From a2007053898addac3e112e826f05de3c7bad5f70 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Tue, 7 Jan 2025 19:29:07 +0100 Subject: [PATCH] fix: Correct scaling of debug visualizations, tune OCR Signed-off-by: Christoph Auer --- docling/models/base_ocr_model.py | 17 ++++++++-- docling/models/layout_model.py | 43 +++++++++++-------------- docling/models/table_structure_model.py | 20 ++++++++++++ 3 files changed, 53 insertions(+), 27 deletions(-) diff --git a/docling/models/base_ocr_model.py b/docling/models/base_ocr_model.py index 38b5e52c..187c6d0f 100644 --- a/docling/models/base_ocr_model.py +++ b/docling/models/base_ocr_model.py @@ -12,7 +12,7 @@ from scipy.ndimage import find_objects, label from docling.datamodel.base_models import Cell, OcrCell, Page from docling.datamodel.document import ConversionResult -from docling.datamodel.pipeline_options import OcrOptions +from docling.datamodel.pipeline_options import EasyOcrOptions, OcrOptions from docling.datamodel.settings import settings from docling.models.base_model import BasePageModel @@ -138,18 +138,31 @@ class BaseOcrModel(BasePageModel): def draw_ocr_rects_and_cells(self, conv_res, page, ocr_rects, show: bool = False): image = copy.deepcopy(page.image) + scale_x = image.width / page.size.width + scale_y = image.height / page.size.height + draw = ImageDraw.Draw(image, "RGBA") # Draw OCR rectangles as yellow filled rect for rect in ocr_rects: x0, y0, x1, y1 = rect.as_tuple() + y0 *= scale_x + y1 *= scale_y + x0 *= scale_x + x1 *= scale_x + shade_color = (255, 255, 0, 40) # transparent yellow draw.rectangle([(x0, y0), (x1, y1)], fill=shade_color, outline=None) # Draw OCR and programmatic cells for tc in page.cells: x0, y0, x1, y1 = tc.bbox.as_tuple() - color = "red" + y0 *= scale_x + y1 *= scale_y + x0 *= scale_x + x1 *= scale_x + + color = "gray" if isinstance(tc, OcrCell): color = "magenta" draw.rectangle([(x0, y0), (x1, y1)], outline=color) diff --git a/docling/models/layout_model.py b/docling/models/layout_model.py index 014cddd3..c1b7dabe 100644 --- a/docling/models/layout_model.py +++ b/docling/models/layout_model.py @@ -67,29 +67,9 @@ class LayoutModel(BasePageModel): - Right: Clusters including FORM, KEY_VALUE_REGION, and PICTURE. Includes label names and confidence scores for each cluster. """ - label_to_color = { - DocItemLabel.TEXT: (255, 255, 153), # Light Yellow - DocItemLabel.CAPTION: (255, 204, 153), # Light Orange - DocItemLabel.LIST_ITEM: (153, 153, 255), # Light Purple - DocItemLabel.FORMULA: (192, 192, 192), # Gray - DocItemLabel.TABLE: (255, 204, 204), # Light Pink - DocItemLabel.PICTURE: (255, 204, 164), # Light Beige - DocItemLabel.SECTION_HEADER: (255, 153, 153), # Light Red - DocItemLabel.PAGE_HEADER: (204, 255, 204), # Light Green - DocItemLabel.PAGE_FOOTER: ( - 204, - 255, - 204, - ), # Light Green (same as Page-Header) - DocItemLabel.TITLE: (255, 153, 153), # Light Red (same as Section-Header) - DocItemLabel.FOOTNOTE: (200, 200, 255), # Light Blue - DocItemLabel.DOCUMENT_INDEX: (220, 220, 220), # Light Gray - DocItemLabel.CODE: (125, 125, 125), # Gray - DocItemLabel.CHECKBOX_SELECTED: (255, 182, 193), # Pale Green - DocItemLabel.CHECKBOX_UNSELECTED: (255, 182, 193), # Light Pink - DocItemLabel.FORM: (200, 255, 255), # Light Cyan - DocItemLabel.KEY_VALUE_REGION: (183, 65, 14), # Rusty orange - } + scale_x = page.image.width / page.size.width + scale_y = page.image.height / page.size.height + # Filter clusters for left and right images exclude_labels = { DocItemLabel.FORM, @@ -118,6 +98,11 @@ class LayoutModel(BasePageModel): cell_color = (0, 0, 0, 40) # Transparent black for cells for tc in c.cells: cx0, cy0, cx1, cy1 = tc.bbox.as_tuple() + cx0 *= scale_x + cx1 *= scale_x + cy0 *= scale_x + cy1 *= scale_y + draw.rectangle( [(cx0, cy0), (cx1, cy1)], outline=None, @@ -125,8 +110,16 @@ class LayoutModel(BasePageModel): ) # Draw cluster rectangle x0, y0, x1, y1 = c.bbox.as_tuple() - cluster_fill_color = (*list(label_to_color.get(c.label)), 70) - cluster_outline_color = (*list(label_to_color.get(c.label)), 255) + x0 *= scale_x + x1 *= scale_x + y0 *= scale_x + y1 *= scale_y + + cluster_fill_color = (*list(DocItemLabel.get_color(c.label)), 70) + cluster_outline_color = ( + *list(DocItemLabel.get_color(c.label)), + 255, + ) draw.rectangle( [(x0, y0), (x1, y1)], outline=cluster_outline_color, diff --git a/docling/models/table_structure_model.py b/docling/models/table_structure_model.py index ba306449..1f4fbc7e 100644 --- a/docling/models/table_structure_model.py +++ b/docling/models/table_structure_model.py @@ -66,23 +66,43 @@ class TableStructureModel(BasePageModel): show: bool = False, ): assert page._backend is not None + assert page.size is not None image = ( page._backend.get_page_image() ) # make new image to avoid drawing on the saved ones + + scale_x = image.width / page.size.width + scale_y = image.height / page.size.height + draw = ImageDraw.Draw(image) for table_element in tbl_list: x0, y0, x1, y1 = table_element.cluster.bbox.as_tuple() + y0 *= scale_x + y1 *= scale_y + x0 *= scale_x + x1 *= scale_x + draw.rectangle([(x0, y0), (x1, y1)], outline="red") for cell in table_element.cluster.cells: x0, y0, x1, y1 = cell.bbox.as_tuple() + x0 *= scale_x + x1 *= scale_x + y0 *= scale_x + y1 *= scale_y + draw.rectangle([(x0, y0), (x1, y1)], outline="green") for tc in table_element.table_cells: if tc.bbox is not None: x0, y0, x1, y1 = tc.bbox.as_tuple() + x0 *= scale_x + x1 *= scale_x + y0 *= scale_x + y1 *= scale_y + if tc.column_header: width = 3 else: