Visualization codes output PNG to debug dir

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2025-07-30 14:04:27 +00:00 · 2024-10-29 13:53:29 +01:00 · 2024-10-29 13:53:29 +01:00 · e1b83ec485
commit e1b83ec485
parent 0cdccb3da1
9 changed files with 91 additions and 22 deletions
--- a/docling/datamodel/settings.py
+++ b/docling/datamodel/settings.py
@ -36,7 +36,7 @@ class DebugSettings(BaseModel):
    profile_pipeline_timings: bool = False

    # Path used to output debug information.
-    debug_output_path: str = str(Path.cwd())
+    debug_output_path: str = str(Path.cwd() / "debug")


 class AppSettings(BaseSettings):
--- a/docling/models/base_ocr_model.py
+++ b/docling/models/base_ocr_model.py
@ -1,6 +1,7 @@
 import copy
 import logging
 from abc import abstractmethod
+from pathlib import Path
 from typing import Iterable, List

 import numpy as np
@ -12,6 +13,7 @@ from scipy.ndimage import find_objects, label
 from docling.datamodel.base_models import OcrCell, Page
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import OcrOptions
+from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel

 _log = logging.getLogger(__name__)
@ -115,7 +117,7 @@ class BaseOcrModel(BasePageModel):
        ]
        return filtered_ocr_cells

-    def draw_ocr_rects_and_cells(self, page, ocr_rects):
+    def draw_ocr_rects_and_cells(self, conv_res, page, ocr_rects, show: bool = False):
        image = copy.deepcopy(page.image)
        draw = ImageDraw.Draw(image, "RGBA")

@ -132,7 +134,18 @@ class BaseOcrModel(BasePageModel):
            if isinstance(tc, OcrCell):
                color = "magenta"
            draw.rectangle([(x0, y0), (x1, y1)], outline=color)
-        image.show()
+
+        if show:
+            image.show()
+        else:
+            out_path: Path = (
+                Path(settings.debug.debug_output_path)
+                / f"debug_{conv_res.input.file.stem}"
+            )
+            out_path.mkdir(parents=True, exist_ok=True)
+
+            out_file = out_path / f"ocr_page_{page.page_no:05}.png"
+            image.save(str(out_file), format="png")

    @abstractmethod
    def __call__(
--- a/docling/models/ds_glm_model.py
+++ b/docling/models/ds_glm_model.py
@ -1,5 +1,6 @@
 import copy
 import random
+from pathlib import Path
 from typing import List, Union

 from deepsearch_glm.nlp_utils import init_nlp_model
@ -27,6 +28,7 @@ from pydantic import BaseModel, ConfigDict

 from docling.datamodel.base_models import Cluster, FigureElement, Table, TextElement
 from docling.datamodel.document import ConversionResult, layout_label_to_ds_type
+from docling.datamodel.settings import settings
 from docling.utils.profiling import ProfilingScope, TimeRecorder
 from docling.utils.utils import create_hash

@ -236,15 +238,15 @@ class GlmModel:
            docling_doc: DoclingDocument = to_docling_document(glm_doc)  # Experimental

        # DEBUG code:
-        def draw_clusters_and_cells(ds_document, page_no):
+        def draw_clusters_and_cells(ds_document, page_no, show: bool = False):
            clusters_to_draw = []
            image = copy.deepcopy(conv_res.pages[page_no].image)
            for ix, elem in enumerate(ds_document.main_text):
                if isinstance(elem, BaseText):
-                    prov = elem.prov[0]
+                    prov = elem.prov[0]  # type: ignore
                elif isinstance(elem, Ref):
                    _, arr, index = elem.ref.split("/")
-                    index = int(index)
+                    index = int(index)  # type: ignore
                    if arr == "tables":
                        prov = ds_document.tables[index].prov[0]
                    elif arr == "figures":
@ -258,7 +260,7 @@ class GlmModel:
                            id=ix,
                            label=elem.name,
                            bbox=BoundingBox.from_tuple(
-                                coord=prov.bbox,
+                                coord=prov.bbox,  # type: ignore
                                origin=CoordOrigin.BOTTOMLEFT,
                            ).to_top_left_origin(conv_res.pages[page_no].size.height),
                        )
@ -278,9 +280,21 @@ class GlmModel:
                for tc in c.cells:  # [:1]:
                    x0, y0, x1, y1 = tc.bbox.as_tuple()
                    draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
-            image.show()

-        # draw_clusters_and_cells(ds_doc, 0)
-        # draw_clusters_and_cells(exported_doc, 0)
+            if show:
+                image.show()
+            else:
+                out_path: Path = (
+                    Path(settings.debug.debug_output_path)
+                    / f"debug_{conv_res.input.file.stem}"
+                )
+                out_path.mkdir(parents=True, exist_ok=True)
+
+                out_file = out_path / f"doc_page_{page_no:05}.png"
+                image.save(str(out_file), format="png")
+
+        # for item in ds_doc.page_dimensions:
+        #    page_no = item.page
+        #    draw_clusters_and_cells(ds_doc, page_no)

        return docling_doc
--- a/docling/models/easyocr_model.py
+++ b/docling/models/easyocr_model.py
@ -96,6 +96,6 @@ class EasyOcrModel(BaseOcrModel):

                # DEBUG code:
                if settings.debug.visualize_ocr:
-                    self.draw_ocr_rects_and_cells(page, ocr_rects)
+                    self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)

                yield page
--- a/docling/models/layout_model.py
+++ b/docling/models/layout_model.py
@ -324,7 +324,7 @@ class LayoutModel(BasePageModel):
                    # clusters = self.sort_clusters_by_cell_order(clusters)

                    # DEBUG code:
-                    def draw_clusters_and_cells(show: bool = True):
+                    def draw_clusters_and_cells(show: bool = False):
                        image = copy.deepcopy(page.image)
                        if image is not None:
                            draw = ImageDraw.Draw(image)
@ -344,6 +344,17 @@ class LayoutModel(BasePageModel):
                                    )
                            if show:
                                image.show()
+                            else:
+                                out_path: Path = (
+                                    Path(settings.debug.debug_output_path)
+                                    / f"debug_{conv_res.input.file.stem}"
+                                )
+                                out_path.mkdir(parents=True, exist_ok=True)
+
+                                out_file = (
+                                    out_path / f"layout_page_{page.page_no:05}.png"
+                                )
+                                image.save(str(out_file), format="png")

                    # draw_clusters_and_cells()

--- a/docling/models/page_preprocessing_model.py
+++ b/docling/models/page_preprocessing_model.py
@ -1,3 +1,4 @@
+from pathlib import Path
 from typing import Iterable, Optional

 from PIL import ImageDraw
@ -5,6 +6,7 @@ from pydantic import BaseModel

 from docling.datamodel.base_models import Page
 from docling.datamodel.document import ConversionResult
+from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
 from docling.utils.profiling import TimeRecorder

@ -27,7 +29,7 @@ class PagePreprocessingModel(BasePageModel):
            else:
                with TimeRecorder(conv_res, "page_parse"):
                    page = self._populate_page_images(page)
-                    page = self._parse_page_cells(page)
+                    page = self._parse_page_cells(conv_res, page)
                yield page

    # Generate the page image and store it in the page object
@ -48,19 +50,30 @@ class PagePreprocessingModel(BasePageModel):
        return page

    # Extract and populate the page cells and store it in the page object
-    def _parse_page_cells(self, page: Page) -> Page:
+    def _parse_page_cells(self, conv_res: ConversionResult, page: Page) -> Page:
        assert page._backend is not None

        page.cells = list(page._backend.get_text_cells())

        # DEBUG code:
-        def draw_text_boxes(image, cells):
+        def draw_text_boxes(image, cells, show: bool = False):
            draw = ImageDraw.Draw(image)
            for c in cells:
                x0, y0, x1, y1 = c.bbox.as_tuple()
                draw.rectangle([(x0, y0), (x1, y1)], outline="red")
-            image.show()
+            if show:
+                image.show()
+            else:
+                out_path: Path = (
+                    Path(settings.debug.debug_output_path)
+                    / f"debug_{conv_res.input.file.stem}"
+                )
+                out_path.mkdir(parents=True, exist_ok=True)

-        # draw_text_boxes(page.get_image(scale=1.0), cells)
+                out_file = out_path / f"cells_page_{page.page_no:05}.png"
+                image.save(str(out_file), format="png")
+
+        if settings.debug.visualize_cells:
+            draw_text_boxes(page.get_image(scale=1.0), page.cells)

        return page
--- a/docling/models/table_structure_model.py
+++ b/docling/models/table_structure_model.py
@ -38,7 +38,13 @@ class TableStructureModel(BasePageModel):
            self.tf_predictor = TFPredictor(self.tm_config)
            self.scale = 2.0  # Scale up table input images to 144 dpi

-    def draw_table_and_cells(self, page: Page, tbl_list: Iterable[Table]):
+    def draw_table_and_cells(
+        self,
+        conv_res: ConversionResult,
+        page: Page,
+        tbl_list: Iterable[Table],
+        show: bool = False,
+    ):
        assert page._backend is not None

        image = (
@ -64,7 +70,17 @@ class TableStructureModel(BasePageModel):
                        fill="black",
                    )

-        image.show()
+        if show:
+            image.show()
+        else:
+            out_path: Path = (
+                Path(settings.debug.debug_output_path)
+                / f"debug_{conv_res.input.file.stem}"
+            )
+            out_path.mkdir(parents=True, exist_ok=True)
+
+            out_file = out_path / f"table_struct_page_{page.page_no:05}.png"
+            image.save(str(out_file), format="png")

    def __call__(
        self, conv_res: ConversionResult, page_batch: Iterable[Page]
@ -182,7 +198,9 @@ class TableStructureModel(BasePageModel):
                    # For debugging purposes:
                    if settings.debug.visualize_tables:
                        self.draw_table_and_cells(
-                            page, page.predictions.tablestructure.table_map.values()
+                            conv_res,
+                            page,
+                            page.predictions.tablestructure.table_map.values(),
                        )

                yield page
--- a/docling/models/tesseract_ocr_cli_model.py
+++ b/docling/models/tesseract_ocr_cli_model.py
@ -179,6 +179,6 @@ class TesseractOcrCliModel(BaseOcrModel):

                # DEBUG code:
                if settings.debug.visualize_ocr:
-                    self.draw_ocr_rects_and_cells(page, ocr_rects)
+                    self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)

                yield page
--- a/docling/models/tesseract_ocr_model.py
+++ b/docling/models/tesseract_ocr_model.py
@ -137,6 +137,6 @@ class TesseractOcrModel(BaseOcrModel):

                # DEBUG code:
                if settings.debug.visualize_ocr:
-                    self.draw_ocr_rects_and_cells(page, ocr_rects)
+                    self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)

                yield page