mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
fix: Tesseract OCR CLI can't process images composed with numbers only (#1201)
fix wrong type text extracted by tesseract_ocr_cli_model Signed-off-by: gvl4 <Guilhem.VERMOREL@3ds.com> Co-authored-by: gvl4 <Guilhem.VERMOREL@3ds.com> Signed-off-by: Benichou <fbenichou@deloitte.ca>
This commit is contained in:
parent
44f2b081ec
commit
4c741b53fa
@ -247,7 +247,7 @@ class TesseractOcrCliModel(BaseOcrModel):
|
|||||||
|
|
||||||
cell = TextCell(
|
cell = TextCell(
|
||||||
index=ix,
|
index=ix,
|
||||||
text=text,
|
text=str(text),
|
||||||
orig=text,
|
orig=text,
|
||||||
from_ocr=True,
|
from_ocr=True,
|
||||||
confidence=conf / 100.0,
|
confidence=conf / 100.0,
|
||||||
|
Loading…
Reference in New Issue
Block a user