feat: Add pipeline timings and toggle visualization, establish debug settings (#183)

* Add settings to turn visualization on or off Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add profiling code to all models Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Refactor and fix profiling codes Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Visualization codes output PNG to debug dir Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fixes for time logging Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Optimize imports Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update lockfile Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add start_timestamps to ProfilingItem Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2025-12-18 09:31:02 +00:00 · 2024-10-30 15:04:19 +01:00
parent 94a5290789
commit 2a2c65bf4f
23 changed files with 998 additions and 771 deletions
--- a/docling/models/base_model.py
+++ b/docling/models/base_model.py
@@ -4,11 +4,14 @@ from typing import Any, Iterable
 from docling_core.types.doc import DoclingDocument, NodeItem

 from docling.datamodel.base_models import Page
+from docling.datamodel.document import ConversionResult


 class BasePageModel(ABC):
    @abstractmethod
-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
        pass


--- a/docling/models/base_ocr_model.py
+++ b/docling/models/base_ocr_model.py
@@ -1,6 +1,7 @@
 import copy
 import logging
 from abc import abstractmethod
+from pathlib import Path
 from typing import Iterable, List

 import numpy as np
@@ -10,12 +11,15 @@ from rtree import index
 from scipy.ndimage import find_objects, label

 from docling.datamodel.base_models import OcrCell, Page
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import OcrOptions
+from docling.datamodel.settings import settings
+from docling.models.base_model import BasePageModel

 _log = logging.getLogger(__name__)


-class BaseOcrModel:
+class BaseOcrModel(BasePageModel):
    def __init__(self, enabled: bool, options: OcrOptions):
        self.enabled = enabled
        self.options = options
@@ -113,7 +117,7 @@ class BaseOcrModel:
        ]
        return filtered_ocr_cells

-    def draw_ocr_rects_and_cells(self, page, ocr_rects):
+    def draw_ocr_rects_and_cells(self, conv_res, page, ocr_rects, show: bool = False):
        image = copy.deepcopy(page.image)
        draw = ImageDraw.Draw(image, "RGBA")

@@ -130,8 +134,21 @@ class BaseOcrModel:
            if isinstance(tc, OcrCell):
                color = "magenta"
            draw.rectangle([(x0, y0), (x1, y1)], outline=color)
-        image.show()
+
+        if show:
+            image.show()
+        else:
+            out_path: Path = (
+                Path(settings.debug.debug_output_path)
+                / f"debug_{conv_res.input.file.stem}"
+            )
+            out_path.mkdir(parents=True, exist_ok=True)
+
+            out_file = out_path / f"ocr_page_{page.page_no:05}.png"
+            image.save(str(out_file), format="png")

    @abstractmethod
-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
        pass
--- a/docling/models/ds_glm_model.py
+++ b/docling/models/ds_glm_model.py
@@ -1,5 +1,6 @@
 import copy
 import random
+from pathlib import Path
 from typing import List, Union

 from deepsearch_glm.nlp_utils import init_nlp_model
@@ -27,6 +28,8 @@ from pydantic import BaseModel, ConfigDict

 from docling.datamodel.base_models import Cluster, FigureElement, Table, TextElement
 from docling.datamodel.document import ConversionResult, layout_label_to_ds_type
+from docling.datamodel.settings import settings
+from docling.utils.profiling import ProfilingScope, TimeRecorder
 from docling.utils.utils import create_hash


@@ -226,23 +229,24 @@ class GlmModel:
        return ds_doc

    def __call__(self, conv_res: ConversionResult) -> DoclingDocument:
-        ds_doc = self._to_legacy_document(conv_res)
-        ds_doc_dict = ds_doc.model_dump(by_alias=True)
+        with TimeRecorder(conv_res, "glm", scope=ProfilingScope.DOCUMENT):
+            ds_doc = self._to_legacy_document(conv_res)
+            ds_doc_dict = ds_doc.model_dump(by_alias=True)

-        glm_doc = self.model.apply_on_doc(ds_doc_dict)
+            glm_doc = self.model.apply_on_doc(ds_doc_dict)

-        docling_doc: DoclingDocument = to_docling_document(glm_doc)  # Experimental
+            docling_doc: DoclingDocument = to_docling_document(glm_doc)  # Experimental

        # DEBUG code:
-        def draw_clusters_and_cells(ds_document, page_no):
+        def draw_clusters_and_cells(ds_document, page_no, show: bool = False):
            clusters_to_draw = []
            image = copy.deepcopy(conv_res.pages[page_no].image)
            for ix, elem in enumerate(ds_document.main_text):
                if isinstance(elem, BaseText):
-                    prov = elem.prov[0]
+                    prov = elem.prov[0]  # type: ignore
                elif isinstance(elem, Ref):
                    _, arr, index = elem.ref.split("/")
-                    index = int(index)
+                    index = int(index)  # type: ignore
                    if arr == "tables":
                        prov = ds_document.tables[index].prov[0]
                    elif arr == "figures":
@@ -256,7 +260,7 @@ class GlmModel:
                            id=ix,
                            label=elem.name,
                            bbox=BoundingBox.from_tuple(
-                                coord=prov.bbox,
+                                coord=prov.bbox,  # type: ignore
                                origin=CoordOrigin.BOTTOMLEFT,
                            ).to_top_left_origin(conv_res.pages[page_no].size.height),
                        )
@@ -276,9 +280,21 @@ class GlmModel:
                for tc in c.cells:  # [:1]:
                    x0, y0, x1, y1 = tc.bbox.as_tuple()
                    draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
-            image.show()

-        # draw_clusters_and_cells(ds_doc, 0)
-        # draw_clusters_and_cells(exported_doc, 0)
+            if show:
+                image.show()
+            else:
+                out_path: Path = (
+                    Path(settings.debug.debug_output_path)
+                    / f"debug_{conv_res.input.file.stem}"
+                )
+                out_path.mkdir(parents=True, exist_ok=True)
+
+                out_file = out_path / f"doc_page_{page_no:05}.png"
+                image.save(str(out_file), format="png")
+
+        # for item in ds_doc.page_dimensions:
+        #    page_no = item.page
+        #    draw_clusters_and_cells(ds_doc, page_no)

        return docling_doc
--- a/docling/models/easyocr_model.py
+++ b/docling/models/easyocr_model.py
@@ -5,8 +5,11 @@ import numpy
 from docling_core.types.doc import BoundingBox, CoordOrigin

 from docling.datamodel.base_models import OcrCell, Page
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import EasyOcrOptions
+from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
+from docling.utils.profiling import TimeRecorder

 _log = logging.getLogger(__name__)

@@ -33,58 +36,65 @@ class EasyOcrModel(BaseOcrModel):
                download_enabled=self.options.download_enabled,
            )

-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:

        if not self.enabled:
            yield from page_batch
            return

        for page in page_batch:
+
            assert page._backend is not None
            if not page._backend.is_valid():
                yield page
            else:
-                ocr_rects = self.get_ocr_rects(page)
+                with TimeRecorder(conv_res, "ocr"):
+                    ocr_rects = self.get_ocr_rects(page)

-                all_ocr_cells = []
-                for ocr_rect in ocr_rects:
-                    # Skip zero area boxes
-                    if ocr_rect.area() == 0:
-                        continue
-                    high_res_image = page._backend.get_page_image(
-                        scale=self.scale, cropbox=ocr_rect
-                    )
-                    im = numpy.array(high_res_image)
-                    result = self.reader.readtext(im)
-
-                    del high_res_image
-                    del im
-
-                    cells = [
-                        OcrCell(
-                            id=ix,
-                            text=line[1],
-                            confidence=line[2],
-                            bbox=BoundingBox.from_tuple(
-                                coord=(
-                                    (line[0][0][0] / self.scale) + ocr_rect.l,
-                                    (line[0][0][1] / self.scale) + ocr_rect.t,
-                                    (line[0][2][0] / self.scale) + ocr_rect.l,
-                                    (line[0][2][1] / self.scale) + ocr_rect.t,
-                                ),
-                                origin=CoordOrigin.TOPLEFT,
-                            ),
+                    all_ocr_cells = []
+                    for ocr_rect in ocr_rects:
+                        # Skip zero area boxes
+                        if ocr_rect.area() == 0:
+                            continue
+                        high_res_image = page._backend.get_page_image(
+                            scale=self.scale, cropbox=ocr_rect
                        )
-                        for ix, line in enumerate(result)
-                    ]
-                    all_ocr_cells.extend(cells)
+                        im = numpy.array(high_res_image)
+                        result = self.reader.readtext(im)

-                ## Remove OCR cells which overlap with programmatic cells.
-                filtered_ocr_cells = self.filter_ocr_cells(all_ocr_cells, page.cells)
+                        del high_res_image
+                        del im

-                page.cells.extend(filtered_ocr_cells)
+                        cells = [
+                            OcrCell(
+                                id=ix,
+                                text=line[1],
+                                confidence=line[2],
+                                bbox=BoundingBox.from_tuple(
+                                    coord=(
+                                        (line[0][0][0] / self.scale) + ocr_rect.l,
+                                        (line[0][0][1] / self.scale) + ocr_rect.t,
+                                        (line[0][2][0] / self.scale) + ocr_rect.l,
+                                        (line[0][2][1] / self.scale) + ocr_rect.t,
+                                    ),
+                                    origin=CoordOrigin.TOPLEFT,
+                                ),
+                            )
+                            for ix, line in enumerate(result)
+                        ]
+                        all_ocr_cells.extend(cells)
+
+                    ## Remove OCR cells which overlap with programmatic cells.
+                    filtered_ocr_cells = self.filter_ocr_cells(
+                        all_ocr_cells, page.cells
+                    )
+
+                    page.cells.extend(filtered_ocr_cells)

                # DEBUG code:
-                # self.draw_ocr_rects_and_cells(page, ocr_rects)
+                if settings.debug.visualize_ocr:
+                    self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)

                yield page
--- a/docling/models/layout_model.py
+++ b/docling/models/layout_model.py
@@ -16,8 +16,11 @@ from docling.datamodel.base_models import (
    LayoutPrediction,
    Page,
 )
+from docling.datamodel.document import ConversionResult
+from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
 from docling.utils import layout_utils as lu
+from docling.utils.profiling import TimeRecorder

 _log = logging.getLogger(__name__)

@@ -271,74 +274,97 @@ class LayoutModel(BasePageModel):

        return clusters_out_new, cells_out_new

-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
+
        for page in page_batch:
            assert page._backend is not None
            if not page._backend.is_valid():
                yield page
            else:
-                assert page.size is not None
+                with TimeRecorder(conv_res, "layout"):
+                    assert page.size is not None

-                clusters = []
-                for ix, pred_item in enumerate(
-                    self.layout_predictor.predict(page.get_image(scale=1.0))
-                ):
-                    label = DocItemLabel(
-                        pred_item["label"].lower().replace(" ", "_").replace("-", "_")
-                    )  # Temporary, until docling-ibm-model uses docling-core types
-                    cluster = Cluster(
-                        id=ix,
-                        label=label,
-                        confidence=pred_item["confidence"],
-                        bbox=BoundingBox.model_validate(pred_item),
-                        cells=[],
-                    )
-                    clusters.append(cluster)
-
-                # Map cells to clusters
-                # TODO: Remove, postprocess should take care of it anyway.
-                for cell in page.cells:
-                    for cluster in clusters:
-                        if not cell.bbox.area() > 0:
-                            overlap_frac = 0.0
-                        else:
-                            overlap_frac = (
-                                cell.bbox.intersection_area_with(cluster.bbox)
-                                / cell.bbox.area()
-                            )
-
-                        if overlap_frac > 0.5:
-                            cluster.cells.append(cell)
-
-                # Pre-sort clusters
-                # clusters = self.sort_clusters_by_cell_order(clusters)
-
-                # DEBUG code:
-                def draw_clusters_and_cells():
-                    image = copy.deepcopy(page.image)
-                    draw = ImageDraw.Draw(image)
-                    for c in clusters:
-                        x0, y0, x1, y1 = c.bbox.as_tuple()
-                        draw.rectangle([(x0, y0), (x1, y1)], outline="green")
-
-                        cell_color = (
-                            random.randint(30, 140),
-                            random.randint(30, 140),
-                            random.randint(30, 140),
+                    clusters = []
+                    for ix, pred_item in enumerate(
+                        self.layout_predictor.predict(page.get_image(scale=1.0))
+                    ):
+                        label = DocItemLabel(
+                            pred_item["label"]
+                            .lower()
+                            .replace(" ", "_")
+                            .replace("-", "_")
+                        )  # Temporary, until docling-ibm-model uses docling-core types
+                        cluster = Cluster(
+                            id=ix,
+                            label=label,
+                            confidence=pred_item["confidence"],
+                            bbox=BoundingBox.model_validate(pred_item),
+                            cells=[],
                        )
-                        for tc in c.cells:  # [:1]:
-                            x0, y0, x1, y1 = tc.bbox.as_tuple()
-                            draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
-                    image.show()
+                        clusters.append(cluster)

-                # draw_clusters_and_cells()
+                    # Map cells to clusters
+                    # TODO: Remove, postprocess should take care of it anyway.
+                    for cell in page.cells:
+                        for cluster in clusters:
+                            if not cell.bbox.area() > 0:
+                                overlap_frac = 0.0
+                            else:
+                                overlap_frac = (
+                                    cell.bbox.intersection_area_with(cluster.bbox)
+                                    / cell.bbox.area()
+                                )

-                clusters, page.cells = self.postprocess(
-                    clusters, page.cells, page.size.height
-                )
+                            if overlap_frac > 0.5:
+                                cluster.cells.append(cell)

-                # draw_clusters_and_cells()
+                    # Pre-sort clusters
+                    # clusters = self.sort_clusters_by_cell_order(clusters)

-                page.predictions.layout = LayoutPrediction(clusters=clusters)
+                    # DEBUG code:
+                    def draw_clusters_and_cells(show: bool = False):
+                        image = copy.deepcopy(page.image)
+                        if image is not None:
+                            draw = ImageDraw.Draw(image)
+                            for c in clusters:
+                                x0, y0, x1, y1 = c.bbox.as_tuple()
+                                draw.rectangle([(x0, y0), (x1, y1)], outline="green")
+
+                                cell_color = (
+                                    random.randint(30, 140),
+                                    random.randint(30, 140),
+                                    random.randint(30, 140),
+                                )
+                                for tc in c.cells:  # [:1]:
+                                    x0, y0, x1, y1 = tc.bbox.as_tuple()
+                                    draw.rectangle(
+                                        [(x0, y0), (x1, y1)], outline=cell_color
+                                    )
+                            if show:
+                                image.show()
+                            else:
+                                out_path: Path = (
+                                    Path(settings.debug.debug_output_path)
+                                    / f"debug_{conv_res.input.file.stem}"
+                                )
+                                out_path.mkdir(parents=True, exist_ok=True)
+
+                                out_file = (
+                                    out_path / f"layout_page_{page.page_no:05}.png"
+                                )
+                                image.save(str(out_file), format="png")
+
+                    # draw_clusters_and_cells()
+
+                    clusters, page.cells = self.postprocess(
+                        clusters, page.cells, page.size.height
+                    )
+
+                    page.predictions.layout = LayoutPrediction(clusters=clusters)
+
+                if settings.debug.visualize_layout:
+                    draw_clusters_and_cells()

                yield page
--- a/docling/models/page_assemble_model.py
+++ b/docling/models/page_assemble_model.py
@@ -12,8 +12,10 @@ from docling.datamodel.base_models import (
    Table,
    TextElement,
 )
+from docling.datamodel.document import ConversionResult
 from docling.models.base_model import BasePageModel
 from docling.models.layout_model import LayoutModel
+from docling.utils.profiling import TimeRecorder

 _log = logging.getLogger(__name__)

@@ -51,122 +53,122 @@ class PageAssembleModel(BasePageModel):

        return sanitized_text.strip()  # Strip any leading or trailing whitespace

-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
        for page in page_batch:
            assert page._backend is not None
            if not page._backend.is_valid():
                yield page
            else:
-                assert page.predictions.layout is not None
+                with TimeRecorder(conv_res, "page_assemble"):

-                # assembles some JSON output page by page.
+                    assert page.predictions.layout is not None

-                elements: List[PageElement] = []
-                headers: List[PageElement] = []
-                body: List[PageElement] = []
+                    # assembles some JSON output page by page.

-                for cluster in page.predictions.layout.clusters:
-                    # _log.info("Cluster label seen:", cluster.label)
-                    if cluster.label in LayoutModel.TEXT_ELEM_LABELS:
+                    elements: List[PageElement] = []
+                    headers: List[PageElement] = []
+                    body: List[PageElement] = []

-                        textlines = [
-                            cell.text.replace("\x02", "-").strip()
-                            for cell in cluster.cells
-                            if len(cell.text.strip()) > 0
-                        ]
-                        text = self.sanitize_text(textlines)
-                        text_el = TextElement(
-                            label=cluster.label,
-                            id=cluster.id,
-                            text=text,
-                            page_no=page.page_no,
-                            cluster=cluster,
-                        )
-                        elements.append(text_el)
+                    for cluster in page.predictions.layout.clusters:
+                        # _log.info("Cluster label seen:", cluster.label)
+                        if cluster.label in LayoutModel.TEXT_ELEM_LABELS:

-                        if cluster.label in LayoutModel.PAGE_HEADER_LABELS:
-                            headers.append(text_el)
-                        else:
-                            body.append(text_el)
-                    elif cluster.label == LayoutModel.TABLE_LABEL:
-                        tbl = None
-                        if page.predictions.tablestructure:
-                            tbl = page.predictions.tablestructure.table_map.get(
-                                cluster.id, None
-                            )
-                        if (
-                            not tbl
-                        ):  # fallback: add table without structure, if it isn't present
-                            tbl = Table(
+                            textlines = [
+                                cell.text.replace("\x02", "-").strip()
+                                for cell in cluster.cells
+                                if len(cell.text.strip()) > 0
+                            ]
+                            text = self.sanitize_text(textlines)
+                            text_el = TextElement(
                                label=cluster.label,
                                id=cluster.id,
-                                text="",
-                                otsl_seq=[],
-                                table_cells=[],
-                                cluster=cluster,
-                                page_no=page.page_no,
-                            )
-
-                        elements.append(tbl)
-                        body.append(tbl)
-                    elif cluster.label == LayoutModel.FIGURE_LABEL:
-                        fig = None
-                        if page.predictions.figures_classification:
-                            fig = (
-                                page.predictions.figures_classification.figure_map.get(
-                                    cluster.id, None
-                                )
-                            )
-                        if (
-                            not fig
-                        ):  # fallback: add figure without classification, if it isn't present
-                            fig = FigureElement(
-                                label=cluster.label,
-                                id=cluster.id,
-                                text="",
-                                data=None,
-                                cluster=cluster,
-                                page_no=page.page_no,
-                            )
-                        elements.append(fig)
-                        body.append(fig)
-                    elif cluster.label == LayoutModel.FORMULA_LABEL:
-                        equation = None
-                        if page.predictions.equations_prediction:
-                            equation = (
-                                page.predictions.equations_prediction.equation_map.get(
-                                    cluster.id, None
-                                )
-                            )
-                        if (
-                            not equation
-                        ):  # fallback: add empty formula, if it isn't present
-                            text = self.sanitize_text(
-                                [
-                                    cell.text.replace("\x02", "-").strip()
-                                    for cell in cluster.cells
-                                    if len(cell.text.strip()) > 0
-                                ]
-                            )
-                            equation = TextElement(
-                                label=cluster.label,
-                                id=cluster.id,
-                                cluster=cluster,
-                                page_no=page.page_no,
                                text=text,
+                                page_no=page.page_no,
+                                cluster=cluster,
                            )
-                        elements.append(equation)
-                        body.append(equation)
+                            elements.append(text_el)

-                page.assembled = AssembledUnit(
-                    elements=elements, headers=headers, body=body
-                )
+                            if cluster.label in LayoutModel.PAGE_HEADER_LABELS:
+                                headers.append(text_el)
+                            else:
+                                body.append(text_el)
+                        elif cluster.label == LayoutModel.TABLE_LABEL:
+                            tbl = None
+                            if page.predictions.tablestructure:
+                                tbl = page.predictions.tablestructure.table_map.get(
+                                    cluster.id, None
+                                )
+                            if (
+                                not tbl
+                            ):  # fallback: add table without structure, if it isn't present
+                                tbl = Table(
+                                    label=cluster.label,
+                                    id=cluster.id,
+                                    text="",
+                                    otsl_seq=[],
+                                    table_cells=[],
+                                    cluster=cluster,
+                                    page_no=page.page_no,
+                                )

-                # Remove page images (can be disabled)
-                if not self.options.keep_images:
-                    page._image_cache = {}
+                            elements.append(tbl)
+                            body.append(tbl)
+                        elif cluster.label == LayoutModel.FIGURE_LABEL:
+                            fig = None
+                            if page.predictions.figures_classification:
+                                fig = page.predictions.figures_classification.figure_map.get(
+                                    cluster.id, None
+                                )
+                            if (
+                                not fig
+                            ):  # fallback: add figure without classification, if it isn't present
+                                fig = FigureElement(
+                                    label=cluster.label,
+                                    id=cluster.id,
+                                    text="",
+                                    data=None,
+                                    cluster=cluster,
+                                    page_no=page.page_no,
+                                )
+                            elements.append(fig)
+                            body.append(fig)
+                        elif cluster.label == LayoutModel.FORMULA_LABEL:
+                            equation = None
+                            if page.predictions.equations_prediction:
+                                equation = page.predictions.equations_prediction.equation_map.get(
+                                    cluster.id, None
+                                )
+                            if (
+                                not equation
+                            ):  # fallback: add empty formula, if it isn't present
+                                text = self.sanitize_text(
+                                    [
+                                        cell.text.replace("\x02", "-").strip()
+                                        for cell in cluster.cells
+                                        if len(cell.text.strip()) > 0
+                                    ]
+                                )
+                                equation = TextElement(
+                                    label=cluster.label,
+                                    id=cluster.id,
+                                    cluster=cluster,
+                                    page_no=page.page_no,
+                                    text=text,
+                                )
+                            elements.append(equation)
+                            body.append(equation)

-                # Unload backend
-                page._backend.unload()
+                    page.assembled = AssembledUnit(
+                        elements=elements, headers=headers, body=body
+                    )
+
+                    # Remove page images (can be disabled)
+                    if not self.options.keep_images:
+                        page._image_cache = {}
+
+                    # Unload backend
+                    page._backend.unload()

                yield page
--- a/docling/models/page_preprocessing_model.py
+++ b/docling/models/page_preprocessing_model.py
@@ -1,10 +1,14 @@
+from pathlib import Path
 from typing import Iterable, Optional

 from PIL import ImageDraw
 from pydantic import BaseModel

 from docling.datamodel.base_models import Page
+from docling.datamodel.document import ConversionResult
+from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
+from docling.utils.profiling import TimeRecorder


 class PagePreprocessingOptions(BaseModel):
@@ -15,14 +19,17 @@ class PagePreprocessingModel(BasePageModel):
    def __init__(self, options: PagePreprocessingOptions):
        self.options = options

-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
        for page in page_batch:
            assert page._backend is not None
            if not page._backend.is_valid():
                yield page
            else:
-                page = self._populate_page_images(page)
-                page = self._parse_page_cells(page)
+                with TimeRecorder(conv_res, "page_parse"):
+                    page = self._populate_page_images(page)
+                    page = self._parse_page_cells(conv_res, page)
                yield page

    # Generate the page image and store it in the page object
@@ -43,19 +50,30 @@ class PagePreprocessingModel(BasePageModel):
        return page

    # Extract and populate the page cells and store it in the page object
-    def _parse_page_cells(self, page: Page) -> Page:
+    def _parse_page_cells(self, conv_res: ConversionResult, page: Page) -> Page:
        assert page._backend is not None

        page.cells = list(page._backend.get_text_cells())

        # DEBUG code:
-        def draw_text_boxes(image, cells):
+        def draw_text_boxes(image, cells, show: bool = False):
            draw = ImageDraw.Draw(image)
            for c in cells:
                x0, y0, x1, y1 = c.bbox.as_tuple()
                draw.rectangle([(x0, y0), (x1, y1)], outline="red")
-            image.show()
+            if show:
+                image.show()
+            else:
+                out_path: Path = (
+                    Path(settings.debug.debug_output_path)
+                    / f"debug_{conv_res.input.file.stem}"
+                )
+                out_path.mkdir(parents=True, exist_ok=True)

-        # draw_text_boxes(page.get_image(scale=1.0), cells)
+                out_file = out_path / f"cells_page_{page.page_no:05}.png"
+                image.save(str(out_file), format="png")
+
+        if settings.debug.visualize_cells:
+            draw_text_boxes(page.get_image(scale=1.0), page.cells)

        return page
--- a/docling/models/table_structure_model.py
+++ b/docling/models/table_structure_model.py
@@ -1,6 +1,6 @@
 import copy
 from pathlib import Path
-from typing import Iterable, List
+from typing import Iterable

 import numpy
 from docling_core.types.doc import BoundingBox, DocItemLabel, TableCell
@@ -8,8 +8,11 @@ from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredic
 from PIL import ImageDraw

 from docling.datamodel.base_models import Page, Table, TableStructurePrediction
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import TableFormerMode, TableStructureOptions
+from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
+from docling.utils.profiling import TimeRecorder


 class TableStructureModel(BasePageModel):
@@ -35,7 +38,13 @@ class TableStructureModel(BasePageModel):
            self.tf_predictor = TFPredictor(self.tm_config)
            self.scale = 2.0  # Scale up table input images to 144 dpi

-    def draw_table_and_cells(self, page: Page, tbl_list: List[Table]):
+    def draw_table_and_cells(
+        self,
+        conv_res: ConversionResult,
+        page: Page,
+        tbl_list: Iterable[Table],
+        show: bool = False,
+    ):
        assert page._backend is not None

        image = (
@@ -61,9 +70,21 @@ class TableStructureModel(BasePageModel):
                        fill="black",
                    )

-        image.show()
+        if show:
+            image.show()
+        else:
+            out_path: Path = (
+                Path(settings.debug.debug_output_path)
+                / f"debug_{conv_res.input.file.stem}"
+            )
+            out_path.mkdir(parents=True, exist_ok=True)

-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+            out_file = out_path / f"table_struct_page_{page.page_no:05}.png"
+            image.save(str(out_file), format="png")
+
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:

        if not self.enabled:
            yield from page_batch
@@ -74,98 +95,112 @@ class TableStructureModel(BasePageModel):
            if not page._backend.is_valid():
                yield page
            else:
+                with TimeRecorder(conv_res, "table_structure"):

-                assert page.predictions.layout is not None
-                assert page.size is not None
+                    assert page.predictions.layout is not None
+                    assert page.size is not None

-                page.predictions.tablestructure = TableStructurePrediction()  # dummy
+                    page.predictions.tablestructure = (
+                        TableStructurePrediction()
+                    )  # dummy

-                in_tables = [
-                    (
-                        cluster,
-                        [
-                            round(cluster.bbox.l) * self.scale,
-                            round(cluster.bbox.t) * self.scale,
-                            round(cluster.bbox.r) * self.scale,
-                            round(cluster.bbox.b) * self.scale,
-                        ],
+                    in_tables = [
+                        (
+                            cluster,
+                            [
+                                round(cluster.bbox.l) * self.scale,
+                                round(cluster.bbox.t) * self.scale,
+                                round(cluster.bbox.r) * self.scale,
+                                round(cluster.bbox.b) * self.scale,
+                            ],
+                        )
+                        for cluster in page.predictions.layout.clusters
+                        if cluster.label == DocItemLabel.TABLE
+                    ]
+                    if not len(in_tables):
+                        yield page
+                        continue
+
+                    tokens = []
+                    for c in page.cells:
+                        for cluster, _ in in_tables:
+                            if c.bbox.area() > 0:
+                                if (
+                                    c.bbox.intersection_area_with(cluster.bbox)
+                                    / c.bbox.area()
+                                    > 0.2
+                                ):
+                                    # Only allow non empty stings (spaces) into the cells of a table
+                                    if len(c.text.strip()) > 0:
+                                        new_cell = copy.deepcopy(c)
+                                        new_cell.bbox = new_cell.bbox.scaled(
+                                            scale=self.scale
+                                        )
+
+                                        tokens.append(new_cell.model_dump())
+
+                    page_input = {
+                        "tokens": tokens,
+                        "width": page.size.width * self.scale,
+                        "height": page.size.height * self.scale,
+                    }
+                    page_input["image"] = numpy.asarray(
+                        page.get_image(scale=self.scale)
                    )
-                    for cluster in page.predictions.layout.clusters
-                    if cluster.label == DocItemLabel.TABLE
-                ]
-                if not len(in_tables):
-                    yield page
-                    continue

-                tokens = []
-                for c in page.cells:
-                    for cluster, _ in in_tables:
-                        if c.bbox.area() > 0:
-                            if (
-                                c.bbox.intersection_area_with(cluster.bbox)
-                                / c.bbox.area()
-                                > 0.2
-                            ):
-                                # Only allow non empty stings (spaces) into the cells of a table
-                                if len(c.text.strip()) > 0:
-                                    new_cell = copy.deepcopy(c)
-                                    new_cell.bbox = new_cell.bbox.scaled(
-                                        scale=self.scale
+                    table_clusters, table_bboxes = zip(*in_tables)
+
+                    if len(table_bboxes):
+                        tf_output = self.tf_predictor.multi_table_predict(
+                            page_input, table_bboxes, do_matching=self.do_cell_matching
+                        )
+
+                        for table_cluster, table_out in zip(table_clusters, tf_output):
+                            table_cells = []
+                            for element in table_out["tf_responses"]:
+
+                                if not self.do_cell_matching:
+                                    the_bbox = BoundingBox.model_validate(
+                                        element["bbox"]
+                                    ).scaled(1 / self.scale)
+                                    text_piece = page._backend.get_text_in_rect(
+                                        the_bbox
                                    )
+                                    element["bbox"]["token"] = text_piece

-                                    tokens.append(new_cell.model_dump())
+                                tc = TableCell.model_validate(element)
+                                if self.do_cell_matching and tc.bbox is not None:
+                                    tc.bbox = tc.bbox.scaled(1 / self.scale)
+                                table_cells.append(tc)

-                page_input = {
-                    "tokens": tokens,
-                    "width": page.size.width * self.scale,
-                    "height": page.size.height * self.scale,
-                }
-                page_input["image"] = numpy.asarray(page.get_image(scale=self.scale))
+                            # Retrieving cols/rows, after post processing:
+                            num_rows = table_out["predict_details"]["num_rows"]
+                            num_cols = table_out["predict_details"]["num_cols"]
+                            otsl_seq = table_out["predict_details"]["prediction"][
+                                "rs_seq"
+                            ]

-                table_clusters, table_bboxes = zip(*in_tables)
+                            tbl = Table(
+                                otsl_seq=otsl_seq,
+                                table_cells=table_cells,
+                                num_rows=num_rows,
+                                num_cols=num_cols,
+                                id=table_cluster.id,
+                                page_no=page.page_no,
+                                cluster=table_cluster,
+                                label=DocItemLabel.TABLE,
+                            )

-                if len(table_bboxes):
-                    tf_output = self.tf_predictor.multi_table_predict(
-                        page_input, table_bboxes, do_matching=self.do_cell_matching
-                    )
-
-                    for table_cluster, table_out in zip(table_clusters, tf_output):
-                        table_cells = []
-                        for element in table_out["tf_responses"]:
-
-                            if not self.do_cell_matching:
-                                the_bbox = BoundingBox.model_validate(
-                                    element["bbox"]
-                                ).scaled(1 / self.scale)
-                                text_piece = page._backend.get_text_in_rect(the_bbox)
-                                element["bbox"]["token"] = text_piece
-
-                            tc = TableCell.model_validate(element)
-                            if self.do_cell_matching and tc.bbox is not None:
-                                tc.bbox = tc.bbox.scaled(1 / self.scale)
-                            table_cells.append(tc)
-
-                        # Retrieving cols/rows, after post processing:
-                        num_rows = table_out["predict_details"]["num_rows"]
-                        num_cols = table_out["predict_details"]["num_cols"]
-                        otsl_seq = table_out["predict_details"]["prediction"]["rs_seq"]
-
-                        tbl = Table(
-                            otsl_seq=otsl_seq,
-                            table_cells=table_cells,
-                            num_rows=num_rows,
-                            num_cols=num_cols,
-                            id=table_cluster.id,
-                            page_no=page.page_no,
-                            cluster=table_cluster,
-                            label=DocItemLabel.TABLE,
-                        )
-
-                        page.predictions.tablestructure.table_map[table_cluster.id] = (
-                            tbl
-                        )
+                            page.predictions.tablestructure.table_map[
+                                table_cluster.id
+                            ] = tbl

                    # For debugging purposes:
-                    # self.draw_table_and_cells(page, page.predictions.tablestructure.table_map.values())
+                    if settings.debug.visualize_tables:
+                        self.draw_table_and_cells(
+                            conv_res,
+                            page,
+                            page.predictions.tablestructure.table_map.values(),
+                        )

                yield page
--- a/docling/models/tesseract_ocr_cli_model.py
+++ b/docling/models/tesseract_ocr_cli_model.py
@@ -8,8 +8,11 @@ import pandas as pd
 from docling_core.types.doc import BoundingBox, CoordOrigin

 from docling.datamodel.base_models import OcrCell, Page
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import TesseractCliOcrOptions
+from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
+from docling.utils.profiling import TimeRecorder

 _log = logging.getLogger(__name__)

@@ -102,7 +105,9 @@ class TesseractOcrCliModel(BaseOcrModel):

        return df_filtered

-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:

        if not self.enabled:
            yield from page_batch
@@ -113,62 +118,67 @@ class TesseractOcrCliModel(BaseOcrModel):
            if not page._backend.is_valid():
                yield page
            else:
-                ocr_rects = self.get_ocr_rects(page)
+                with TimeRecorder(conv_res, "ocr"):

-                all_ocr_cells = []
-                for ocr_rect in ocr_rects:
-                    # Skip zero area boxes
-                    if ocr_rect.area() == 0:
-                        continue
-                    high_res_image = page._backend.get_page_image(
-                        scale=self.scale, cropbox=ocr_rect
+                    ocr_rects = self.get_ocr_rects(page)
+
+                    all_ocr_cells = []
+                    for ocr_rect in ocr_rects:
+                        # Skip zero area boxes
+                        if ocr_rect.area() == 0:
+                            continue
+                        high_res_image = page._backend.get_page_image(
+                            scale=self.scale, cropbox=ocr_rect
+                        )
+
+                        with tempfile.NamedTemporaryFile(
+                            suffix=".png", mode="w"
+                        ) as image_file:
+                            fname = image_file.name
+                            high_res_image.save(fname)
+
+                            df = self._run_tesseract(fname)
+
+                        # _log.info(df)
+
+                        # Print relevant columns (bounding box and text)
+                        for ix, row in df.iterrows():
+                            text = row["text"]
+                            conf = row["conf"]
+
+                            l = float(row["left"])
+                            b = float(row["top"])
+                            w = float(row["width"])
+                            h = float(row["height"])
+
+                            t = b + h
+                            r = l + w
+
+                            cell = OcrCell(
+                                id=ix,
+                                text=text,
+                                confidence=conf / 100.0,
+                                bbox=BoundingBox.from_tuple(
+                                    coord=(
+                                        (l / self.scale) + ocr_rect.l,
+                                        (b / self.scale) + ocr_rect.t,
+                                        (r / self.scale) + ocr_rect.l,
+                                        (t / self.scale) + ocr_rect.t,
+                                    ),
+                                    origin=CoordOrigin.TOPLEFT,
+                                ),
+                            )
+                            all_ocr_cells.append(cell)
+
+                    ## Remove OCR cells which overlap with programmatic cells.
+                    filtered_ocr_cells = self.filter_ocr_cells(
+                        all_ocr_cells, page.cells
                    )

-                    with tempfile.NamedTemporaryFile(
-                        suffix=".png", mode="w"
-                    ) as image_file:
-                        fname = image_file.name
-                        high_res_image.save(fname)
-
-                        df = self._run_tesseract(fname)
-
-                    # _log.info(df)
-
-                    # Print relevant columns (bounding box and text)
-                    for ix, row in df.iterrows():
-                        text = row["text"]
-                        conf = row["conf"]
-
-                        l = float(row["left"])
-                        b = float(row["top"])
-                        w = float(row["width"])
-                        h = float(row["height"])
-
-                        t = b + h
-                        r = l + w
-
-                        cell = OcrCell(
-                            id=ix,
-                            text=text,
-                            confidence=conf / 100.0,
-                            bbox=BoundingBox.from_tuple(
-                                coord=(
-                                    (l / self.scale) + ocr_rect.l,
-                                    (b / self.scale) + ocr_rect.t,
-                                    (r / self.scale) + ocr_rect.l,
-                                    (t / self.scale) + ocr_rect.t,
-                                ),
-                                origin=CoordOrigin.TOPLEFT,
-                            ),
-                        )
-                        all_ocr_cells.append(cell)
-
-                ## Remove OCR cells which overlap with programmatic cells.
-                filtered_ocr_cells = self.filter_ocr_cells(all_ocr_cells, page.cells)
-
-                page.cells.extend(filtered_ocr_cells)
+                    page.cells.extend(filtered_ocr_cells)

                # DEBUG code:
-                # self.draw_ocr_rects_and_cells(page, ocr_rects)
+                if settings.debug.visualize_ocr:
+                    self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)

                yield page
--- a/docling/models/tesseract_ocr_model.py
+++ b/docling/models/tesseract_ocr_model.py
@@ -4,8 +4,11 @@ from typing import Iterable
 from docling_core.types.doc import BoundingBox, CoordOrigin

 from docling.datamodel.base_models import OcrCell, Page
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import TesseractOcrOptions
+from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
+from docling.utils.profiling import TimeRecorder

 _log = logging.getLogger(__name__)

@@ -61,7 +64,9 @@ class TesseractOcrModel(BaseOcrModel):
            # Finalize the tesseractAPI
            self.reader.End()

-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:

        if not self.enabled:
            yield from page_batch
@@ -72,59 +77,66 @@ class TesseractOcrModel(BaseOcrModel):
            if not page._backend.is_valid():
                yield page
            else:
-                assert self.reader is not None
+                with TimeRecorder(conv_res, "ocr"):

-                ocr_rects = self.get_ocr_rects(page)
+                    assert self.reader is not None

-                all_ocr_cells = []
-                for ocr_rect in ocr_rects:
-                    # Skip zero area boxes
-                    if ocr_rect.area() == 0:
-                        continue
-                    high_res_image = page._backend.get_page_image(
-                        scale=self.scale, cropbox=ocr_rect
-                    )
+                    ocr_rects = self.get_ocr_rects(page)

-                    # Retrieve text snippets with their bounding boxes
-                    self.reader.SetImage(high_res_image)
-                    boxes = self.reader.GetComponentImages(
-                        self.reader_RIL.TEXTLINE, True
-                    )
-
-                    cells = []
-                    for ix, (im, box, _, _) in enumerate(boxes):
-                        # Set the area of interest. Tesseract uses Bottom-Left for the origin
-                        self.reader.SetRectangle(box["x"], box["y"], box["w"], box["h"])
-
-                        # Extract text within the bounding box
-                        text = self.reader.GetUTF8Text().strip()
-                        confidence = self.reader.MeanTextConf()
-                        left = box["x"] / self.scale
-                        bottom = box["y"] / self.scale
-                        right = (box["x"] + box["w"]) / self.scale
-                        top = (box["y"] + box["h"]) / self.scale
-
-                        cells.append(
-                            OcrCell(
-                                id=ix,
-                                text=text,
-                                confidence=confidence,
-                                bbox=BoundingBox.from_tuple(
-                                    coord=(left, top, right, bottom),
-                                    origin=CoordOrigin.TOPLEFT,
-                                ),
-                            )
+                    all_ocr_cells = []
+                    for ocr_rect in ocr_rects:
+                        # Skip zero area boxes
+                        if ocr_rect.area() == 0:
+                            continue
+                        high_res_image = page._backend.get_page_image(
+                            scale=self.scale, cropbox=ocr_rect
                        )

-                    # del high_res_image
-                    all_ocr_cells.extend(cells)
+                        # Retrieve text snippets with their bounding boxes
+                        self.reader.SetImage(high_res_image)
+                        boxes = self.reader.GetComponentImages(
+                            self.reader_RIL.TEXTLINE, True
+                        )

-                ## Remove OCR cells which overlap with programmatic cells.
-                filtered_ocr_cells = self.filter_ocr_cells(all_ocr_cells, page.cells)
+                        cells = []
+                        for ix, (im, box, _, _) in enumerate(boxes):
+                            # Set the area of interest. Tesseract uses Bottom-Left for the origin
+                            self.reader.SetRectangle(
+                                box["x"], box["y"], box["w"], box["h"]
+                            )

-                page.cells.extend(filtered_ocr_cells)
+                            # Extract text within the bounding box
+                            text = self.reader.GetUTF8Text().strip()
+                            confidence = self.reader.MeanTextConf()
+                            left = box["x"] / self.scale
+                            bottom = box["y"] / self.scale
+                            right = (box["x"] + box["w"]) / self.scale
+                            top = (box["y"] + box["h"]) / self.scale
+
+                            cells.append(
+                                OcrCell(
+                                    id=ix,
+                                    text=text,
+                                    confidence=confidence,
+                                    bbox=BoundingBox.from_tuple(
+                                        coord=(left, top, right, bottom),
+                                        origin=CoordOrigin.TOPLEFT,
+                                    ),
+                                )
+                            )
+
+                        # del high_res_image
+                        all_ocr_cells.extend(cells)
+
+                    ## Remove OCR cells which overlap with programmatic cells.
+                    filtered_ocr_cells = self.filter_ocr_cells(
+                        all_ocr_cells, page.cells
+                    )
+
+                    page.cells.extend(filtered_ocr_cells)

                # DEBUG code:
-                # self.draw_ocr_rects_and_cells(page, ocr_rects)
+                if settings.debug.visualize_ocr:
+                    self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)

                yield page