fix(ocr): fix TesseractOcrCliModel._is_auto computation

This commit is contained in:
Clément Doumouro 2025-05-20 20:42:57 +02:00
parent bac5ce6e38
commit 30f9570e6e
2 changed files with 2 additions and 5 deletions

View File

@ -54,7 +54,7 @@ class TesseractOcrCliModel(BaseOcrModel):
self._version: Optional[str] = None
self._tesseract_languages: Optional[List[str]] = None
self._script_prefix: Optional[str] = None
self._is_auto: bool = False
self._is_auto: bool = "auto" in self.options.lang
if self.enabled:
try:
@ -192,7 +192,6 @@ class TesseractOcrCliModel(BaseOcrModel):
decoded_data = output.stdout.decode("utf-8")
df_list = pd.read_csv(io.StringIO(decoded_data), header=None)
self._tesseract_languages = df_list[0].tolist()[1:]
self._is_auto = "auto" in self._tesseract_languages
# Decide the script prefix
if any(lang.startswith("script/") for lang in self._tesseract_languages):

View File

@ -41,7 +41,7 @@ class TesseractOcrModel(BaseOcrModel):
accelerator_options=accelerator_options,
)
self.options: TesseractOcrOptions
self._is_auto: bool = "auto" in self.options.lang
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
self.reader = None
self.script_readers: dict[str, tesserocr.PyTessBaseAPI] = {}
@ -76,8 +76,6 @@ class TesseractOcrModel(BaseOcrModel):
if not self._tesserocr_languages:
raise ImportError(missing_langs_errmsg)
self._is_auto: bool = "auto" in self._tesserocr_languages
# Initialize the tesseractAPI
_log.debug("Initializing TesserOCR: %s", tesseract_version)
lang = "+".join(self.options.lang)