mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
fix(ocr): fix TesseractOcrCliModel._is_auto
computation
This commit is contained in:
parent
bac5ce6e38
commit
30f9570e6e
@ -54,7 +54,7 @@ class TesseractOcrCliModel(BaseOcrModel):
|
||||
self._version: Optional[str] = None
|
||||
self._tesseract_languages: Optional[List[str]] = None
|
||||
self._script_prefix: Optional[str] = None
|
||||
self._is_auto: bool = False
|
||||
self._is_auto: bool = "auto" in self.options.lang
|
||||
|
||||
if self.enabled:
|
||||
try:
|
||||
@ -192,7 +192,6 @@ class TesseractOcrCliModel(BaseOcrModel):
|
||||
decoded_data = output.stdout.decode("utf-8")
|
||||
df_list = pd.read_csv(io.StringIO(decoded_data), header=None)
|
||||
self._tesseract_languages = df_list[0].tolist()[1:]
|
||||
self._is_auto = "auto" in self._tesseract_languages
|
||||
|
||||
# Decide the script prefix
|
||||
if any(lang.startswith("script/") for lang in self._tesseract_languages):
|
||||
|
@ -41,7 +41,7 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
accelerator_options=accelerator_options,
|
||||
)
|
||||
self.options: TesseractOcrOptions
|
||||
|
||||
self._is_auto: bool = "auto" in self.options.lang
|
||||
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
|
||||
self.reader = None
|
||||
self.script_readers: dict[str, tesserocr.PyTessBaseAPI] = {}
|
||||
@ -76,8 +76,6 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
if not self._tesserocr_languages:
|
||||
raise ImportError(missing_langs_errmsg)
|
||||
|
||||
self._is_auto: bool = "auto" in self._tesserocr_languages
|
||||
|
||||
# Initialize the tesseractAPI
|
||||
_log.debug("Initializing TesserOCR: %s", tesseract_version)
|
||||
lang = "+".join(self.options.lang)
|
||||
|
Loading…
Reference in New Issue
Block a user