fix(ocr): use PSM integer values directly instead of constructor

- Use integer psm value directly instead of calling tesserocr.PSM()
- Fixed in both main_psm and script_readers initialization
- tesserocr.PSM is a class with integer constants, not an enum

Fixes #2576
This commit is contained in:
mulgyeol
2025-11-04 16:17:50 +09:00
parent 3467b0a035
commit da63a17a3c
2 changed files with 3 additions and 4 deletions

View File

@@ -97,9 +97,7 @@ class TesseractOcrModel(BaseOcrModel):
# Set main OCR reader with configurable PSM # Set main OCR reader with configurable PSM
main_psm = ( main_psm = (
tesserocr.PSM(self.options.psm) self.options.psm if self.options.psm is not None else tesserocr.PSM.AUTO
if self.options.psm is not None
else tesserocr.PSM.AUTO
) )
if lang == "auto": if lang == "auto":
self.reader = tesserocr.PyTessBaseAPI(psm=main_psm, **tesserocr_kwargs) self.reader = tesserocr.PyTessBaseAPI(psm=main_psm, **tesserocr_kwargs)
@@ -195,7 +193,7 @@ class TesseractOcrModel(BaseOcrModel):
tesserocr.PyTessBaseAPI( tesserocr.PyTessBaseAPI(
path=self.reader.GetDatapath(), path=self.reader.GetDatapath(),
lang=lang, lang=lang,
psm=tesserocr.PSM(self.options.psm) psm=self.options.psm
if self.options.psm is not None if self.options.psm is not None
else tesserocr.PSM.AUTO, else tesserocr.PSM.AUTO,
init=True, init=True,

View File

@@ -63,6 +63,7 @@ def test_e2e_conversions():
(TesseractOcrOptions(), True), (TesseractOcrOptions(), True),
(TesseractCliOcrOptions(), True), (TesseractCliOcrOptions(), True),
(EasyOcrOptions(), False), (EasyOcrOptions(), False),
(TesseractOcrOptions(psm=3), True),
(TesseractOcrOptions(force_full_page_ocr=True), True), (TesseractOcrOptions(force_full_page_ocr=True), True),
(TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]), True), (TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]), True),
(TesseractCliOcrOptions(force_full_page_ocr=True), True), (TesseractCliOcrOptions(force_full_page_ocr=True), True),