mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
fix(ocr): fix TesseractOcrCliModel._is_auto
computation
This commit is contained in:
parent
bac5ce6e38
commit
30f9570e6e
@ -54,7 +54,7 @@ class TesseractOcrCliModel(BaseOcrModel):
|
|||||||
self._version: Optional[str] = None
|
self._version: Optional[str] = None
|
||||||
self._tesseract_languages: Optional[List[str]] = None
|
self._tesseract_languages: Optional[List[str]] = None
|
||||||
self._script_prefix: Optional[str] = None
|
self._script_prefix: Optional[str] = None
|
||||||
self._is_auto: bool = False
|
self._is_auto: bool = "auto" in self.options.lang
|
||||||
|
|
||||||
if self.enabled:
|
if self.enabled:
|
||||||
try:
|
try:
|
||||||
@ -192,7 +192,6 @@ class TesseractOcrCliModel(BaseOcrModel):
|
|||||||
decoded_data = output.stdout.decode("utf-8")
|
decoded_data = output.stdout.decode("utf-8")
|
||||||
df_list = pd.read_csv(io.StringIO(decoded_data), header=None)
|
df_list = pd.read_csv(io.StringIO(decoded_data), header=None)
|
||||||
self._tesseract_languages = df_list[0].tolist()[1:]
|
self._tesseract_languages = df_list[0].tolist()[1:]
|
||||||
self._is_auto = "auto" in self._tesseract_languages
|
|
||||||
|
|
||||||
# Decide the script prefix
|
# Decide the script prefix
|
||||||
if any(lang.startswith("script/") for lang in self._tesseract_languages):
|
if any(lang.startswith("script/") for lang in self._tesseract_languages):
|
||||||
|
@ -41,7 +41,7 @@ class TesseractOcrModel(BaseOcrModel):
|
|||||||
accelerator_options=accelerator_options,
|
accelerator_options=accelerator_options,
|
||||||
)
|
)
|
||||||
self.options: TesseractOcrOptions
|
self.options: TesseractOcrOptions
|
||||||
|
self._is_auto: bool = "auto" in self.options.lang
|
||||||
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
|
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
|
||||||
self.reader = None
|
self.reader = None
|
||||||
self.script_readers: dict[str, tesserocr.PyTessBaseAPI] = {}
|
self.script_readers: dict[str, tesserocr.PyTessBaseAPI] = {}
|
||||||
@ -76,8 +76,6 @@ class TesseractOcrModel(BaseOcrModel):
|
|||||||
if not self._tesserocr_languages:
|
if not self._tesserocr_languages:
|
||||||
raise ImportError(missing_langs_errmsg)
|
raise ImportError(missing_langs_errmsg)
|
||||||
|
|
||||||
self._is_auto: bool = "auto" in self._tesserocr_languages
|
|
||||||
|
|
||||||
# Initialize the tesseractAPI
|
# Initialize the tesseractAPI
|
||||||
_log.debug("Initializing TesserOCR: %s", tesseract_version)
|
_log.debug("Initializing TesserOCR: %s", tesseract_version)
|
||||||
lang = "+".join(self.options.lang)
|
lang = "+".join(self.options.lang)
|
||||||
|
Loading…
Reference in New Issue
Block a user