mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-25 19:44:34 +00:00
Merge branch 'main' of github.com:DS4SD/docling into nli/layout_heron
This commit is contained in:
commit
67bf4d47ba
11
CHANGELOG.md
11
CHANGELOG.md
@ -1,3 +1,14 @@
|
|||||||
|
## [v2.42.1](https://github.com/docling-project/docling/releases/tag/v2.42.1) - 2025-07-22
|
||||||
|
|
||||||
|
### Fix
|
||||||
|
|
||||||
|
* Keep formula clusters also when empty ([#1970](https://github.com/docling-project/docling/issues/1970)) ([`67441ca`](https://github.com/docling-project/docling/commit/67441ca4188d532c79df788c461e7f6f7d2f8170))
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
|
||||||
|
* Enrich existing DoclingDocument ([#1969](https://github.com/docling-project/docling/issues/1969)) ([`90a7cc4`](https://github.com/docling-project/docling/commit/90a7cc4bdda7272cd87d6f4ab3c0b7966f6e9c73))
|
||||||
|
* Add documentation for confidence scores ([#1912](https://github.com/docling-project/docling/issues/1912)) ([`5d98bce`](https://github.com/docling-project/docling/commit/5d98bcea1bd03aff426f903211c931620ff8fcc1))
|
||||||
|
|
||||||
## [v2.42.0](https://github.com/docling-project/docling/releases/tag/v2.42.0) - 2025-07-18
|
## [v2.42.0](https://github.com/docling-project/docling/releases/tag/v2.42.0) - 2025-07-18
|
||||||
|
|
||||||
### Feature
|
### Feature
|
||||||
|
@ -267,9 +267,14 @@ class LayoutPostprocessor:
|
|||||||
# Initial cell assignment
|
# Initial cell assignment
|
||||||
clusters = self._assign_cells_to_clusters(clusters)
|
clusters = self._assign_cells_to_clusters(clusters)
|
||||||
|
|
||||||
# Remove clusters with no cells (if keep_empty_clusters is False)
|
# Remove clusters with no cells (if keep_empty_clusters is False),
|
||||||
|
# but always keep clusters with label DocItemLabel.FORMULA
|
||||||
if not self.options.keep_empty_clusters:
|
if not self.options.keep_empty_clusters:
|
||||||
clusters = [cluster for cluster in clusters if cluster.cells]
|
clusters = [
|
||||||
|
cluster
|
||||||
|
for cluster in clusters
|
||||||
|
if cluster.cells or cluster.label == DocItemLabel.FORMULA
|
||||||
|
]
|
||||||
|
|
||||||
# Handle orphaned cells
|
# Handle orphaned cells
|
||||||
unassigned = self._find_unassigned_cells(clusters)
|
unassigned = self._find_unassigned_cells(clusters)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "docling"
|
name = "docling"
|
||||||
version = "2.42.0" # DO NOT EDIT, updated automatically
|
version = "2.42.1" # DO NOT EDIT, updated automatically
|
||||||
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
|
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
keywords = [
|
keywords = [
|
||||||
|
Loading…
Reference in New Issue
Block a user