diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py index b1199d72..1ed1fb05 100644 --- a/docling/models/tesseract_ocr_cli_model.py +++ b/docling/models/tesseract_ocr_cli_model.py @@ -54,7 +54,7 @@ class TesseractOcrCliModel(BaseOcrModel): self._version: Optional[str] = None self._tesseract_languages: Optional[List[str]] = None self._script_prefix: Optional[str] = None - self._is_auto: bool = False + self._is_auto: bool = "auto" in self.options.lang if self.enabled: try: @@ -192,7 +192,6 @@ class TesseractOcrCliModel(BaseOcrModel): decoded_data = output.stdout.decode("utf-8") df_list = pd.read_csv(io.StringIO(decoded_data), header=None) self._tesseract_languages = df_list[0].tolist()[1:] - self._is_auto = "auto" in self._tesseract_languages # Decide the script prefix if any(lang.startswith("script/") for lang in self._tesseract_languages): diff --git a/docling/models/tesseract_ocr_model.py b/docling/models/tesseract_ocr_model.py index f826dc2d..d00ba05a 100644 --- a/docling/models/tesseract_ocr_model.py +++ b/docling/models/tesseract_ocr_model.py @@ -41,7 +41,7 @@ class TesseractOcrModel(BaseOcrModel): accelerator_options=accelerator_options, ) self.options: TesseractOcrOptions - + self._is_auto: bool = "auto" in self.options.lang self.scale = 3 # multiplier for 72 dpi == 216 dpi. self.reader = None self.script_readers: dict[str, tesserocr.PyTessBaseAPI] = {} @@ -76,8 +76,6 @@ class TesseractOcrModel(BaseOcrModel): if not self._tesserocr_languages: raise ImportError(missing_langs_errmsg) - self._is_auto: bool = "auto" in self._tesserocr_languages - # Initialize the tesseractAPI _log.debug("Initializing TesserOCR: %s", tesseract_version) lang = "+".join(self.options.lang)