fix(ocr): use PSM integer values directly instead of constructor (#2578)

* fix(ocr): use PSM integer values directly instead of constructor

- Use integer psm value directly instead of calling tesserocr.PSM()
- Fixed in both main_psm and script_readers initialization
- tesserocr.PSM is a class with integer constants, not an enum

Fixes #2576

* DCO Remediation Commit for mulgyeol <mulgyeoljung@gmail.com>

I, mulgyeol <mulgyeoljung@gmail.com>, hereby add my Signed-off-by to this commit: da63a17a3c

Signed-off-by: mulgyeol <mulgyeoljung@gmail.com>

---------

Signed-off-by: mulgyeol <mulgyeoljung@gmail.com>
This commit is contained in:
정물결
2025-11-05 03:32:41 +09:00
committed by GitHub
parent 32a5aed5ea
commit 1a5146abc9
2 changed files with 3 additions and 4 deletions

View File

@@ -97,9 +97,7 @@ class TesseractOcrModel(BaseOcrModel):
# Set main OCR reader with configurable PSM
main_psm = (
tesserocr.PSM(self.options.psm)
if self.options.psm is not None
else tesserocr.PSM.AUTO
self.options.psm if self.options.psm is not None else tesserocr.PSM.AUTO
)
if lang == "auto":
self.reader = tesserocr.PyTessBaseAPI(psm=main_psm, **tesserocr_kwargs)
@@ -195,7 +193,7 @@ class TesseractOcrModel(BaseOcrModel):
tesserocr.PyTessBaseAPI(
path=self.reader.GetDatapath(),
lang=lang,
psm=tesserocr.PSM(self.options.psm)
psm=self.options.psm
if self.options.psm is not None
else tesserocr.PSM.AUTO,
init=True,

View File

@@ -63,6 +63,7 @@ def test_e2e_conversions():
(TesseractOcrOptions(), True),
(TesseractCliOcrOptions(), True),
(EasyOcrOptions(), False),
(TesseractOcrOptions(psm=3), True),
(TesseractOcrOptions(force_full_page_ocr=True), True),
(TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]), True),
(TesseractCliOcrOptions(force_full_page_ocr=True), True),