mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-24 19:14:23 +00:00
Merge branch 'main' of github.com:DS4SD/docling into nli/layout_heron
This commit is contained in:
commit
67bf4d47ba
11
CHANGELOG.md
11
CHANGELOG.md
@ -1,3 +1,14 @@
|
||||
## [v2.42.1](https://github.com/docling-project/docling/releases/tag/v2.42.1) - 2025-07-22
|
||||
|
||||
### Fix
|
||||
|
||||
* Keep formula clusters also when empty ([#1970](https://github.com/docling-project/docling/issues/1970)) ([`67441ca`](https://github.com/docling-project/docling/commit/67441ca4188d532c79df788c461e7f6f7d2f8170))
|
||||
|
||||
### Documentation
|
||||
|
||||
* Enrich existing DoclingDocument ([#1969](https://github.com/docling-project/docling/issues/1969)) ([`90a7cc4`](https://github.com/docling-project/docling/commit/90a7cc4bdda7272cd87d6f4ab3c0b7966f6e9c73))
|
||||
* Add documentation for confidence scores ([#1912](https://github.com/docling-project/docling/issues/1912)) ([`5d98bce`](https://github.com/docling-project/docling/commit/5d98bcea1bd03aff426f903211c931620ff8fcc1))
|
||||
|
||||
## [v2.42.0](https://github.com/docling-project/docling/releases/tag/v2.42.0) - 2025-07-18
|
||||
|
||||
### Feature
|
||||
|
@ -267,9 +267,14 @@ class LayoutPostprocessor:
|
||||
# Initial cell assignment
|
||||
clusters = self._assign_cells_to_clusters(clusters)
|
||||
|
||||
# Remove clusters with no cells (if keep_empty_clusters is False)
|
||||
# Remove clusters with no cells (if keep_empty_clusters is False),
|
||||
# but always keep clusters with label DocItemLabel.FORMULA
|
||||
if not self.options.keep_empty_clusters:
|
||||
clusters = [cluster for cluster in clusters if cluster.cells]
|
||||
clusters = [
|
||||
cluster
|
||||
for cluster in clusters
|
||||
if cluster.cells or cluster.label == DocItemLabel.FORMULA
|
||||
]
|
||||
|
||||
# Handle orphaned cells
|
||||
unassigned = self._find_unassigned_cells(clusters)
|
||||
|
@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "docling"
|
||||
version = "2.42.0" # DO NOT EDIT, updated automatically
|
||||
version = "2.42.1" # DO NOT EDIT, updated automatically
|
||||
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
|
||||
license = "MIT"
|
||||
keywords = [
|
||||
|
Loading…
Reference in New Issue
Block a user