mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
fix(ocr): use PSM integer values directly instead of constructor (#2578)
* fix(ocr): use PSM integer values directly instead of constructor
- Use integer psm value directly instead of calling tesserocr.PSM()
- Fixed in both main_psm and script_readers initialization
- tesserocr.PSM is a class with integer constants, not an enum
Fixes #2576
* DCO Remediation Commit for mulgyeol <mulgyeoljung@gmail.com>
I, mulgyeol <mulgyeoljung@gmail.com>, hereby add my Signed-off-by to this commit: da63a17a3c
Signed-off-by: mulgyeol <mulgyeoljung@gmail.com>
---------
Signed-off-by: mulgyeol <mulgyeoljung@gmail.com>
This commit is contained in:
@@ -97,9 +97,7 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
|
||||
# Set main OCR reader with configurable PSM
|
||||
main_psm = (
|
||||
tesserocr.PSM(self.options.psm)
|
||||
if self.options.psm is not None
|
||||
else tesserocr.PSM.AUTO
|
||||
self.options.psm if self.options.psm is not None else tesserocr.PSM.AUTO
|
||||
)
|
||||
if lang == "auto":
|
||||
self.reader = tesserocr.PyTessBaseAPI(psm=main_psm, **tesserocr_kwargs)
|
||||
@@ -195,7 +193,7 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
tesserocr.PyTessBaseAPI(
|
||||
path=self.reader.GetDatapath(),
|
||||
lang=lang,
|
||||
psm=tesserocr.PSM(self.options.psm)
|
||||
psm=self.options.psm
|
||||
if self.options.psm is not None
|
||||
else tesserocr.PSM.AUTO,
|
||||
init=True,
|
||||
|
||||
@@ -63,6 +63,7 @@ def test_e2e_conversions():
|
||||
(TesseractOcrOptions(), True),
|
||||
(TesseractCliOcrOptions(), True),
|
||||
(EasyOcrOptions(), False),
|
||||
(TesseractOcrOptions(psm=3), True),
|
||||
(TesseractOcrOptions(force_full_page_ocr=True), True),
|
||||
(TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]), True),
|
||||
(TesseractCliOcrOptions(force_full_page_ocr=True), True),
|
||||
|
||||
Reference in New Issue
Block a user