chore(ocr): improve logging in case of OSD failure in TesseractOcrCliModel and TesseractOcrModel

This commit is contained in:
Clément Doumouro 2025-05-21 11:16:08 +02:00
parent 30f9570e6e
commit 1ce40a7097
2 changed files with 8 additions and 10 deletions

View File

@ -235,11 +235,6 @@ class TesseractOcrCliModel(BaseOcrModel):
df_osd = self._perform_osd(fname)
doc_orientation = _parse_orientation(df_osd)
except subprocess.CalledProcessError as exc:
if self._is_auto:
# OSD is required in auto mode, skipping
continue
# Proceed to OCR in the hope OCR will succeed while
# OSD failed
_log.error(
"OSD failed (doc %s, page: %s, "
"OCR rectangle: %s, processed image file %s):\n %s",
@ -249,6 +244,10 @@ class TesseractOcrCliModel(BaseOcrModel):
image_file,
exc.stderr,
)
# Skipping if OSD fail when in auto mode, otherwise proceed
# to OCR in the hope OCR will succeed while OSD failed
if self._is_auto:
continue
if doc_orientation != 0:
high_res_image = high_res_image.rotate(
-doc_orientation, expand=True

View File

@ -147,11 +147,6 @@ class TesseractOcrModel(BaseOcrModel):
osd = self.osd_reader.DetectOrientationScript()
# No text, or Orientation and Script detection failure
if osd is None:
if self._is_auto:
# OSD is required in auto mode, skipping
continue
# Proceed to OCR in the hope OCR will succeed while
# OSD failed
_log.error(
"OSD failed for doc (doc %s, page: %s, "
"OCR rectangle: %s)",
@ -159,6 +154,10 @@ class TesseractOcrModel(BaseOcrModel):
page_i,
ocr_rect_i,
)
# Skipping if OSD fail when in auto mode, otherwise proceed
# to OCR in the hope OCR will succeed while OSD failed
if self._is_auto:
continue
doc_orientation = parse_tesseract_orientation(osd["orient_deg"])
if doc_orientation != 0:
high_res_image = high_res_image.rotate(