fix(OCR): Skip zero area OCR cells for all OCR engines

Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com>
This commit is contained in:
Nikos Livathinos 2024-10-07 17:01:47 +02:00
parent a9b22a8694
commit 6faff146e0
3 changed files with 9 additions and 0 deletions

View File

@ -43,6 +43,9 @@ class EasyOcrModel(BaseOcrModel):
all_ocr_cells = []
for ocr_rect in ocr_rects:
# Skip zero area boxes
if ocr_rect.area() == 0:
continue
high_res_image = page._backend.get_page_image(
scale=self.scale, cropbox=ocr_rect
)

View File

@ -108,6 +108,9 @@ class TesseractOcrModel(BaseOcrModel):
all_ocr_cells = []
for ocr_rect in ocr_rects:
# Skip zero area boxes
if ocr_rect.area() == 0:
continue
high_res_image = page._backend.get_page_image(
scale=self.scale, cropbox=ocr_rect
)

View File

@ -61,6 +61,9 @@ class TesserOcrModel(BaseOcrModel):
all_ocr_cells = []
for ocr_rect in ocr_rects:
# Skip zero area boxes
if ocr_rect.area() == 0:
continue
high_res_image = page._backend.get_page_image(
scale=self.scale, cropbox=ocr_rect
)