From e30a703759b5c427b1c3cc79395f47e524fd3ebe Mon Sep 17 00:00:00 2001 From: jimkarag02 <112767673+jimkarag02@users.noreply.github.com> Date: Wed, 14 May 2025 16:05:52 +0300 Subject: [PATCH] fix(ocr): orig field in TesseractOcrCliModel as str (#1553) fix: ensure orig and text are both strings in TesseractOcrCliModel Signed-off-by: Dimitris Karagatslis Signed-off-by: Nikhil Khandelwal --- docling/models/tesseract_ocr_cli_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py index 91b4555f..33ca0c10 100644 --- a/docling/models/tesseract_ocr_cli_model.py +++ b/docling/models/tesseract_ocr_cli_model.py @@ -249,7 +249,7 @@ class TesseractOcrCliModel(BaseOcrModel): cell = TextCell( index=ix, text=str(text), - orig=text, + orig=str(text), from_ocr=True, confidence=conf / 100.0, rect=BoundingRectangle.from_bounding_box(