fix(ocr): use PSM integer values directly instead of constructor (#2578)

* fix(ocr): use PSM integer values directly instead of constructor

- Use integer psm value directly instead of calling tesserocr.PSM()
- Fixed in both main_psm and script_readers initialization
- tesserocr.PSM is a class with integer constants, not an enum

Fixes #2576

* DCO Remediation Commit for mulgyeol <mulgyeoljung@gmail.com>

I, mulgyeol <mulgyeoljung@gmail.com>, hereby add my Signed-off-by to this commit: da63a17a3c

Signed-off-by: mulgyeol <mulgyeoljung@gmail.com>

---------

Signed-off-by: mulgyeol <mulgyeoljung@gmail.com>
This commit is contained in:
정물결
2025-11-05 03:32:41 +09:00
committed by GitHub
parent 32a5aed5ea
commit 1a5146abc9
2 changed files with 3 additions and 4 deletions

View File

@@ -97,9 +97,7 @@ class TesseractOcrModel(BaseOcrModel):
# Set main OCR reader with configurable PSM # Set main OCR reader with configurable PSM
main_psm = ( main_psm = (
tesserocr.PSM(self.options.psm) self.options.psm if self.options.psm is not None else tesserocr.PSM.AUTO
if self.options.psm is not None
else tesserocr.PSM.AUTO
) )
if lang == "auto": if lang == "auto":
self.reader = tesserocr.PyTessBaseAPI(psm=main_psm, **tesserocr_kwargs) self.reader = tesserocr.PyTessBaseAPI(psm=main_psm, **tesserocr_kwargs)
@@ -195,7 +193,7 @@ class TesseractOcrModel(BaseOcrModel):
tesserocr.PyTessBaseAPI( tesserocr.PyTessBaseAPI(
path=self.reader.GetDatapath(), path=self.reader.GetDatapath(),
lang=lang, lang=lang,
psm=tesserocr.PSM(self.options.psm) psm=self.options.psm
if self.options.psm is not None if self.options.psm is not None
else tesserocr.PSM.AUTO, else tesserocr.PSM.AUTO,
init=True, init=True,

View File

@@ -63,6 +63,7 @@ def test_e2e_conversions():
(TesseractOcrOptions(), True), (TesseractOcrOptions(), True),
(TesseractCliOcrOptions(), True), (TesseractCliOcrOptions(), True),
(EasyOcrOptions(), False), (EasyOcrOptions(), False),
(TesseractOcrOptions(psm=3), True),
(TesseractOcrOptions(force_full_page_ocr=True), True), (TesseractOcrOptions(force_full_page_ocr=True), True),
(TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]), True), (TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]), True),
(TesseractCliOcrOptions(force_full_page_ocr=True), True), (TesseractCliOcrOptions(force_full_page_ocr=True), True),