mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
chore(ocr): improve logging in case of OSD failure in TesseractOcrCliModel
and TesseractOcrModel
This commit is contained in:
parent
30f9570e6e
commit
1ce40a7097
@ -235,11 +235,6 @@ class TesseractOcrCliModel(BaseOcrModel):
|
||||
df_osd = self._perform_osd(fname)
|
||||
doc_orientation = _parse_orientation(df_osd)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
if self._is_auto:
|
||||
# OSD is required in auto mode, skipping
|
||||
continue
|
||||
# Proceed to OCR in the hope OCR will succeed while
|
||||
# OSD failed
|
||||
_log.error(
|
||||
"OSD failed (doc %s, page: %s, "
|
||||
"OCR rectangle: %s, processed image file %s):\n %s",
|
||||
@ -249,6 +244,10 @@ class TesseractOcrCliModel(BaseOcrModel):
|
||||
image_file,
|
||||
exc.stderr,
|
||||
)
|
||||
# Skipping if OSD fail when in auto mode, otherwise proceed
|
||||
# to OCR in the hope OCR will succeed while OSD failed
|
||||
if self._is_auto:
|
||||
continue
|
||||
if doc_orientation != 0:
|
||||
high_res_image = high_res_image.rotate(
|
||||
-doc_orientation, expand=True
|
||||
|
@ -147,11 +147,6 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
osd = self.osd_reader.DetectOrientationScript()
|
||||
# No text, or Orientation and Script detection failure
|
||||
if osd is None:
|
||||
if self._is_auto:
|
||||
# OSD is required in auto mode, skipping
|
||||
continue
|
||||
# Proceed to OCR in the hope OCR will succeed while
|
||||
# OSD failed
|
||||
_log.error(
|
||||
"OSD failed for doc (doc %s, page: %s, "
|
||||
"OCR rectangle: %s)",
|
||||
@ -159,6 +154,10 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
page_i,
|
||||
ocr_rect_i,
|
||||
)
|
||||
# Skipping if OSD fail when in auto mode, otherwise proceed
|
||||
# to OCR in the hope OCR will succeed while OSD failed
|
||||
if self._is_auto:
|
||||
continue
|
||||
doc_orientation = parse_tesseract_orientation(osd["orient_deg"])
|
||||
if doc_orientation != 0:
|
||||
high_res_image = high_res_image.rotate(
|
||||
|
Loading…
Reference in New Issue
Block a user