From 44f2b081ec6ee7cc0e3fcd39f0f87244f98cf875 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 29 Mar 2025 11:56:42 +0000 Subject: [PATCH 1/2] chore: bump version to 2.28.4 [skip ci] --- CHANGELOG.md | 6 ++++++ pyproject.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bb86f66..fdbd2b22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## [v2.28.4](https://github.com/docling-project/docling/releases/tag/v2.28.4) - 2025-03-29 + +### Fix + +* Fixes tables when using OCR ([#1261](https://github.com/docling-project/docling/issues/1261)) ([`7afad7e`](https://github.com/docling-project/docling/commit/7afad7e52da642b258edd67f8f4815ea430f05e1)) + ## [v2.28.3](https://github.com/docling-project/docling/releases/tag/v2.28.3) - 2025-03-28 ### Fix diff --git a/pyproject.toml b/pyproject.toml index 3e94e9d8..dd48a9d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "docling" -version = "2.28.3" # DO NOT EDIT, updated automatically +version = "2.28.4" # DO NOT EDIT, updated automatically description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications." authors = [ "Christoph Auer ", From b3d111a3cdb90b653ddaaa356f9299e9cd39b340 Mon Sep 17 00:00:00 2001 From: Guilhem VERMOREL <83694424+guilhemvermorel@users.noreply.github.com> Date: Mon, 31 Mar 2025 10:53:49 +0200 Subject: [PATCH 2/2] fix: Tesseract OCR CLI can't process images composed with numbers only (#1201) fix wrong type text extracted by tesseract_ocr_cli_model Signed-off-by: gvl4 Co-authored-by: gvl4 --- docling/models/tesseract_ocr_cli_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py index 56968a2e..1e7fe039 100644 --- a/docling/models/tesseract_ocr_cli_model.py +++ b/docling/models/tesseract_ocr_cli_model.py @@ -247,7 +247,7 @@ class TesseractOcrCliModel(BaseOcrModel): cell = TextCell( index=ix, - text=text, + text=str(text), orig=text, from_ocr=True, confidence=conf / 100.0,