diff --git a/docling/models/tesseract_ocr_model.py b/docling/models/tesseract_ocr_model.py index be828c59..e9d919e2 100644 --- a/docling/models/tesseract_ocr_model.py +++ b/docling/models/tesseract_ocr_model.py @@ -97,9 +97,7 @@ class TesseractOcrModel(BaseOcrModel): # Set main OCR reader with configurable PSM main_psm = ( - tesserocr.PSM(self.options.psm) - if self.options.psm is not None - else tesserocr.PSM.AUTO + self.options.psm if self.options.psm is not None else tesserocr.PSM.AUTO ) if lang == "auto": self.reader = tesserocr.PyTessBaseAPI(psm=main_psm, **tesserocr_kwargs) @@ -195,7 +193,7 @@ class TesseractOcrModel(BaseOcrModel): tesserocr.PyTessBaseAPI( path=self.reader.GetDatapath(), lang=lang, - psm=tesserocr.PSM(self.options.psm) + psm=self.options.psm if self.options.psm is not None else tesserocr.PSM.AUTO, init=True, diff --git a/tests/test_e2e_ocr_conversion.py b/tests/test_e2e_ocr_conversion.py index 8a25bf95..22c46738 100644 --- a/tests/test_e2e_ocr_conversion.py +++ b/tests/test_e2e_ocr_conversion.py @@ -63,6 +63,7 @@ def test_e2e_conversions(): (TesseractOcrOptions(), True), (TesseractCliOcrOptions(), True), (EasyOcrOptions(), False), + (TesseractOcrOptions(psm=3), True), (TesseractOcrOptions(force_full_page_ocr=True), True), (TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]), True), (TesseractCliOcrOptions(force_full_page_ocr=True), True),