From 67441ca4188d532c79df788c461e7f6f7d2f8170 Mon Sep 17 00:00:00 2001 From: Christoph Auer <60343111+cau-git@users.noreply.github.com> Date: Tue, 22 Jul 2025 17:02:12 +0200 Subject: [PATCH 1/2] fix: Keep formula clusters also when empty (#1970) Keep formula clusters also when empty Signed-off-by: Christoph Auer --- docling/utils/layout_postprocessor.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docling/utils/layout_postprocessor.py b/docling/utils/layout_postprocessor.py index effce01b..edc6b396 100644 --- a/docling/utils/layout_postprocessor.py +++ b/docling/utils/layout_postprocessor.py @@ -267,9 +267,14 @@ class LayoutPostprocessor: # Initial cell assignment clusters = self._assign_cells_to_clusters(clusters) - # Remove clusters with no cells (if keep_empty_clusters is False) + # Remove clusters with no cells (if keep_empty_clusters is False), + # but always keep clusters with label DocItemLabel.FORMULA if not self.options.keep_empty_clusters: - clusters = [cluster for cluster in clusters if cluster.cells] + clusters = [ + cluster + for cluster in clusters + if cluster.cells or cluster.label == DocItemLabel.FORMULA + ] # Handle orphaned cells unassigned = self._find_unassigned_cells(clusters) From ec971bbe684fe504ca3a2c3a3f2939c5236af051 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 22 Jul 2025 16:45:48 +0000 Subject: [PATCH 2/2] chore: bump version to 2.42.1 [skip ci] --- CHANGELOG.md | 11 +++++++++++ pyproject.toml | 2 +- uv.lock | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a39a319..dd313163 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,14 @@ +## [v2.42.1](https://github.com/docling-project/docling/releases/tag/v2.42.1) - 2025-07-22 + +### Fix + +* Keep formula clusters also when empty ([#1970](https://github.com/docling-project/docling/issues/1970)) ([`67441ca`](https://github.com/docling-project/docling/commit/67441ca4188d532c79df788c461e7f6f7d2f8170)) + +### Documentation + +* Enrich existing DoclingDocument ([#1969](https://github.com/docling-project/docling/issues/1969)) ([`90a7cc4`](https://github.com/docling-project/docling/commit/90a7cc4bdda7272cd87d6f4ab3c0b7966f6e9c73)) +* Add documentation for confidence scores ([#1912](https://github.com/docling-project/docling/issues/1912)) ([`5d98bce`](https://github.com/docling-project/docling/commit/5d98bcea1bd03aff426f903211c931620ff8fcc1)) + ## [v2.42.0](https://github.com/docling-project/docling/releases/tag/v2.42.0) - 2025-07-18 ### Feature diff --git a/pyproject.toml b/pyproject.toml index 72a8bfa2..c472e7c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "docling" -version = "2.42.0" # DO NOT EDIT, updated automatically +version = "2.42.1" # DO NOT EDIT, updated automatically description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications." license = "MIT" keywords = [ diff --git a/uv.lock b/uv.lock index 6fb6e572..c790638a 100644 --- a/uv.lock +++ b/uv.lock @@ -806,7 +806,7 @@ wheels = [ [[package]] name = "docling" -version = "2.42.0" +version = "2.42.1" source = { editable = "." } dependencies = [ { name = "accelerate" },