From 396dc660770e4cd9d7be5a0b96993219e3c9c3ba Mon Sep 17 00:00:00 2001 From: gvl4 Date: Wed, 19 Mar 2025 11:32:31 +0100 Subject: [PATCH] fix wrong type text extracted by tesseract_ocr_cli_model Signed-off-by: gvl4 --- docling/models/tesseract_ocr_cli_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py index 56968a2e..1e7fe039 100644 --- a/docling/models/tesseract_ocr_cli_model.py +++ b/docling/models/tesseract_ocr_cli_model.py @@ -247,7 +247,7 @@ class TesseractOcrCliModel(BaseOcrModel): cell = TextCell( index=ix, - text=text, + text=str(text), orig=text, from_ocr=True, confidence=conf / 100.0,