From 53cff14e830c01539c93d56e2616fa80be4e9293 Mon Sep 17 00:00:00 2001 From: Nikos Livathinos Date: Fri, 27 Jun 2025 18:25:09 +0200 Subject: [PATCH] fix: Ensure that TesseractOcrModel does not crash if tesseract OSD is not installed Signed-off-by: Nikos Livathinos --- docling/models/tesseract_ocr_model.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/docling/models/tesseract_ocr_model.py b/docling/models/tesseract_ocr_model.py index 0d520877..ed6306ba 100644 --- a/docling/models/tesseract_ocr_model.py +++ b/docling/models/tesseract_ocr_model.py @@ -144,7 +144,10 @@ class TesseractOcrModel(BaseOcrModel): local_reader = self.reader self.osd_reader.SetImage(high_res_image) + + doc_orientation = 0 osd = self.osd_reader.DetectOrientationScript() + # No text, or Orientation and Script detection failure if osd is None: _log.error( @@ -158,11 +161,14 @@ class TesseractOcrModel(BaseOcrModel): # to OCR in the hope OCR will succeed while OSD failed if self._is_auto: continue - doc_orientation = parse_tesseract_orientation(osd["orient_deg"]) - if doc_orientation != 0: - high_res_image = high_res_image.rotate( - -doc_orientation, expand=True + else: + doc_orientation = parse_tesseract_orientation( + osd["orient_deg"] ) + if doc_orientation != 0: + high_res_image = high_res_image.rotate( + -doc_orientation, expand=True + ) if self._is_auto: script = osd["script_name"] script = map_tesseract_script(script)