mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
fix(OCR): Skip zero area OCR cells for all OCR engines
Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com>
This commit is contained in:
parent
a9b22a8694
commit
6faff146e0
@ -43,6 +43,9 @@ class EasyOcrModel(BaseOcrModel):
|
||||
|
||||
all_ocr_cells = []
|
||||
for ocr_rect in ocr_rects:
|
||||
# Skip zero area boxes
|
||||
if ocr_rect.area() == 0:
|
||||
continue
|
||||
high_res_image = page._backend.get_page_image(
|
||||
scale=self.scale, cropbox=ocr_rect
|
||||
)
|
||||
|
@ -108,6 +108,9 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
|
||||
all_ocr_cells = []
|
||||
for ocr_rect in ocr_rects:
|
||||
# Skip zero area boxes
|
||||
if ocr_rect.area() == 0:
|
||||
continue
|
||||
high_res_image = page._backend.get_page_image(
|
||||
scale=self.scale, cropbox=ocr_rect
|
||||
)
|
||||
|
@ -61,6 +61,9 @@ class TesserOcrModel(BaseOcrModel):
|
||||
|
||||
all_ocr_cells = []
|
||||
for ocr_rect in ocr_rects:
|
||||
# Skip zero area boxes
|
||||
if ocr_rect.area() == 0:
|
||||
continue
|
||||
high_res_image = page._backend.get_page_image(
|
||||
scale=self.scale, cropbox=ocr_rect
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user