From 4c741b53fa5df9509fc125afd91a1c8fc94730d5 Mon Sep 17 00:00:00 2001
From: Guilhem VERMOREL <83694424+guilhemvermorel@users.noreply.github.com>
Date: Mon, 31 Mar 2025 10:53:49 +0200
Subject: [PATCH] fix: Tesseract OCR CLI can't process images composed with
 numbers only (#1201)

fix wrong type text extracted by tesseract_ocr_cli_model

Signed-off-by: gvl4 <Guilhem.VERMOREL@3ds.com>
Co-authored-by: gvl4 <Guilhem.VERMOREL@3ds.com>
Signed-off-by: Benichou <fbenichou@deloitte.ca>
---
 docling/models/tesseract_ocr_cli_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py
index 56968a2e..1e7fe039 100644
--- a/docling/models/tesseract_ocr_cli_model.py
+++ b/docling/models/tesseract_ocr_cli_model.py
@@ -247,7 +247,7 @@ class TesseractOcrCliModel(BaseOcrModel):
 
                             cell = TextCell(
                                 index=ix,
-                                text=text,
+                                text=str(text),
                                 orig=text,
                                 from_ocr=True,
                                 confidence=conf / 100.0,