mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-10 13:48:13 +00:00
fix(tesseract): initialize df_osd to avoid uninitialized variable error (#1718)
* fix: initialize df_osd to avoid uninitialized variable error Signed-off-by: IoannisMaras <maras2002@gmail.com> * Fix formatting Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> * Satisfy mypy, regenerate OCR tests Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: IoannisMaras <maras2002@gmail.com> Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Co-authored-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
@@ -99,12 +99,12 @@ class TesseractOcrCliModel(BaseOcrModel):
|
||||
|
||||
return name, version
|
||||
|
||||
def _run_tesseract(self, ifilename: str, osd: pd.DataFrame):
|
||||
def _run_tesseract(self, ifilename: str, osd: Optional[pd.DataFrame]):
|
||||
r"""
|
||||
Run tesseract CLI
|
||||
"""
|
||||
cmd = [self.options.tesseract_cmd]
|
||||
if self._is_auto:
|
||||
if self._is_auto and osd is not None:
|
||||
lang = self._parse_language(osd)
|
||||
if lang is not None:
|
||||
cmd.append("-l")
|
||||
@@ -231,6 +231,7 @@ class TesseractOcrCliModel(BaseOcrModel):
|
||||
fname = image_file.name
|
||||
high_res_image.save(image_file)
|
||||
doc_orientation = 0
|
||||
df_osd: Optional[pd.DataFrame] = None
|
||||
try:
|
||||
df_osd = self._perform_osd(fname)
|
||||
doc_orientation = _parse_orientation(df_osd)
|
||||
|
||||
Reference in New Issue
Block a user