mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
chore(ocr): improve logging in case of OSD failure in TesseractOcrCliModel
and TesseractOcrModel
This commit is contained in:
parent
30f9570e6e
commit
1ce40a7097
@ -235,11 +235,6 @@ class TesseractOcrCliModel(BaseOcrModel):
|
|||||||
df_osd = self._perform_osd(fname)
|
df_osd = self._perform_osd(fname)
|
||||||
doc_orientation = _parse_orientation(df_osd)
|
doc_orientation = _parse_orientation(df_osd)
|
||||||
except subprocess.CalledProcessError as exc:
|
except subprocess.CalledProcessError as exc:
|
||||||
if self._is_auto:
|
|
||||||
# OSD is required in auto mode, skipping
|
|
||||||
continue
|
|
||||||
# Proceed to OCR in the hope OCR will succeed while
|
|
||||||
# OSD failed
|
|
||||||
_log.error(
|
_log.error(
|
||||||
"OSD failed (doc %s, page: %s, "
|
"OSD failed (doc %s, page: %s, "
|
||||||
"OCR rectangle: %s, processed image file %s):\n %s",
|
"OCR rectangle: %s, processed image file %s):\n %s",
|
||||||
@ -249,6 +244,10 @@ class TesseractOcrCliModel(BaseOcrModel):
|
|||||||
image_file,
|
image_file,
|
||||||
exc.stderr,
|
exc.stderr,
|
||||||
)
|
)
|
||||||
|
# Skipping if OSD fail when in auto mode, otherwise proceed
|
||||||
|
# to OCR in the hope OCR will succeed while OSD failed
|
||||||
|
if self._is_auto:
|
||||||
|
continue
|
||||||
if doc_orientation != 0:
|
if doc_orientation != 0:
|
||||||
high_res_image = high_res_image.rotate(
|
high_res_image = high_res_image.rotate(
|
||||||
-doc_orientation, expand=True
|
-doc_orientation, expand=True
|
||||||
|
@ -147,11 +147,6 @@ class TesseractOcrModel(BaseOcrModel):
|
|||||||
osd = self.osd_reader.DetectOrientationScript()
|
osd = self.osd_reader.DetectOrientationScript()
|
||||||
# No text, or Orientation and Script detection failure
|
# No text, or Orientation and Script detection failure
|
||||||
if osd is None:
|
if osd is None:
|
||||||
if self._is_auto:
|
|
||||||
# OSD is required in auto mode, skipping
|
|
||||||
continue
|
|
||||||
# Proceed to OCR in the hope OCR will succeed while
|
|
||||||
# OSD failed
|
|
||||||
_log.error(
|
_log.error(
|
||||||
"OSD failed for doc (doc %s, page: %s, "
|
"OSD failed for doc (doc %s, page: %s, "
|
||||||
"OCR rectangle: %s)",
|
"OCR rectangle: %s)",
|
||||||
@ -159,6 +154,10 @@ class TesseractOcrModel(BaseOcrModel):
|
|||||||
page_i,
|
page_i,
|
||||||
ocr_rect_i,
|
ocr_rect_i,
|
||||||
)
|
)
|
||||||
|
# Skipping if OSD fail when in auto mode, otherwise proceed
|
||||||
|
# to OCR in the hope OCR will succeed while OSD failed
|
||||||
|
if self._is_auto:
|
||||||
|
continue
|
||||||
doc_orientation = parse_tesseract_orientation(osd["orient_deg"])
|
doc_orientation = parse_tesseract_orientation(osd["orient_deg"])
|
||||||
if doc_orientation != 0:
|
if doc_orientation != 0:
|
||||||
high_res_image = high_res_image.rotate(
|
high_res_image = high_res_image.rotate(
|
||||||
|
Loading…
Reference in New Issue
Block a user