fix(tesseract): initialize df_osd to avoid uninitialized variable error (#1718)

* fix: initialize df_osd to avoid uninitialized variable error

Signed-off-by: IoannisMaras <maras2002@gmail.com>

* Fix formatting

Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com>

* Satisfy mypy, regenerate OCR tests

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: IoannisMaras <maras2002@gmail.com>
Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com>
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
Co-authored-by: Christoph Auer <60343111+cau-git@users.noreply.github.com>
Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Maras Ioannis
2025-06-10 11:57:45 +03:00
committed by GitHub
parent f7f31137f1
commit e979750ce9
20 changed files with 752 additions and 751 deletions

View File

@@ -99,12 +99,12 @@ class TesseractOcrCliModel(BaseOcrModel):
return name, version
def _run_tesseract(self, ifilename: str, osd: pd.DataFrame):
def _run_tesseract(self, ifilename: str, osd: Optional[pd.DataFrame]):
r"""
Run tesseract CLI
"""
cmd = [self.options.tesseract_cmd]
if self._is_auto:
if self._is_auto and osd is not None:
lang = self._parse_language(osd)
if lang is not None:
cmd.append("-l")
@@ -231,6 +231,7 @@ class TesseractOcrCliModel(BaseOcrModel):
fname = image_file.name
high_res_image.save(image_file)
doc_orientation = 0
df_osd: Optional[pd.DataFrame] = None
try:
df_osd = self._perform_osd(fname)
doc_orientation = _parse_orientation(df_osd)