feat: Add pipeline timings and toggle visualization, establish debug settings (#183)

* Add settings to turn visualization on or off Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add profiling code to all models Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Refactor and fix profiling codes Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Visualization codes output PNG to debug dir Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fixes for time logging Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Optimize imports Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update lockfile Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add start_timestamps to ProfilingItem Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2025-12-10 13:48:13 +00:00 · 2024-10-30 15:04:19 +01:00
parent 94a5290789
commit 2a2c65bf4f
23 changed files with 998 additions and 771 deletions
--- a/docling/backend/asciidoc_backend.py
+++ b/docling/backend/asciidoc_backend.py
@@ -1,24 +1,20 @@
 import logging
-import os
 import re
 from io import BytesIO
 from pathlib import Path
 from typing import Set, Union

 from docling_core.types.doc import (
-    DocItem,
    DocItemLabel,
    DoclingDocument,
    DocumentOrigin,
    GroupItem,
    GroupLabel,
    ImageRef,
-    NodeItem,
    Size,
    TableCell,
    TableData,
 )
-from pydantic import AnyUrl

 from docling.backend.abstract_backend import DeclarativeDocumentBackend
 from docling.datamodel.base_models import InputFormat
--- a/docling/datamodel/base_models.py
+++ b/docling/datamodel/base_models.py
@@ -1,6 +1,6 @@
 from enum import Enum, auto
 from io import BytesIO
-from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union
+from typing import TYPE_CHECKING, Dict, List, Optional, Union

 from docling_core.types.doc import (
    BoundingBox,
--- a/docling/datamodel/document.py
+++ b/docling/datamodel/document.py
@@ -3,7 +3,7 @@ import re
 from enum import Enum
 from io import BytesIO
 from pathlib import Path, PurePath
-from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Type, Union
+from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Type, Union

 import filetype
 from docling_core.types.doc import (
@@ -52,6 +52,7 @@ from docling.datamodel.base_models import (
    Page,
 )
 from docling.datamodel.settings import DocumentLimits
+from docling.utils.profiling import ProfilingItem
 from docling.utils.utils import create_file_hash, create_hash

 if TYPE_CHECKING:
@@ -187,6 +188,7 @@ class ConversionResult(BaseModel):

    pages: List[Page] = []
    assembled: AssembledUnit = AssembledUnit()
+    timings: Dict[str, ProfilingItem] = {}

    document: DoclingDocument = _EMPTY_DOCLING_DOC

--- a/docling/datamodel/settings.py
+++ b/docling/datamodel/settings.py
@@ -1,4 +1,5 @@
 import sys
+from pathlib import Path

 from pydantic import BaseModel
 from pydantic_settings import BaseSettings
@@ -26,8 +27,21 @@ class BatchConcurrencySettings(BaseModel):
    # To force models into single core: export OMP_NUM_THREADS=1


+class DebugSettings(BaseModel):
+    visualize_cells: bool = False
+    visualize_ocr: bool = False
+    visualize_layout: bool = False
+    visualize_tables: bool = False
+
+    profile_pipeline_timings: bool = False
+
+    # Path used to output debug information.
+    debug_output_path: str = str(Path.cwd() / "debug")
+
+
 class AppSettings(BaseSettings):
    perf: BatchConcurrencySettings
+    debug: DebugSettings


-settings = AppSettings(perf=BatchConcurrencySettings())
+settings = AppSettings(perf=BatchConcurrencySettings(), debug=DebugSettings())
--- a/docling/document_converter.py
+++ b/docling/document_converter.py
@@ -189,24 +189,35 @@ class DocumentConverter:
    ) -> Iterator[ConversionResult]:
        assert self.format_to_options is not None

+        start_time = time.monotonic()
+
        for input_batch in chunkify(
            conv_input.docs(self.format_to_options),
            settings.perf.doc_batch_size,  # pass format_options
        ):
            _log.info(f"Going to convert document batch...")
+
            # parallel processing only within input_batch
            # with ThreadPoolExecutor(
            #    max_workers=settings.perf.doc_batch_concurrency
            # ) as pool:
            #   yield from pool.map(self.process_document, input_batch)
-
            # Note: PDF backends are not thread-safe, thread pool usage was disabled.
+
            for item in map(
                partial(self._process_document, raises_on_error=raises_on_error),
                input_batch,
            ):
+                elapsed = time.monotonic() - start_time
+                start_time = time.monotonic()
+
                if item is not None:
+                    _log.info(
+                        f"Finished converting document {item.input.file.name} in {elapsed:.2f} sec."
+                    )
                    yield item
+                else:
+                    _log.info(f"Skipped a document. We lost {elapsed:.2f} sec.")

    def _get_pipeline(self, doc: InputDocument) -> Optional[BasePipeline]:
        assert self.format_to_options is not None
@@ -237,15 +248,8 @@ class DocumentConverter:
        assert self.allowed_formats is not None
        assert in_doc.format in self.allowed_formats

-        start_doc_time = time.time()
-
        conv_res = self._execute_pipeline(in_doc, raises_on_error=raises_on_error)

-        end_doc_time = time.time() - start_doc_time
-        _log.info(
-            f"Finished converting document {in_doc.file.name} in {end_doc_time:.2f} seconds."
-        )
-
        return conv_res

    def _execute_pipeline(
--- a/docling/models/base_model.py
+++ b/docling/models/base_model.py
@@ -4,11 +4,14 @@ from typing import Any, Iterable
 from docling_core.types.doc import DoclingDocument, NodeItem

 from docling.datamodel.base_models import Page
+from docling.datamodel.document import ConversionResult


 class BasePageModel(ABC):
    @abstractmethod
-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
        pass


--- a/docling/models/base_ocr_model.py
+++ b/docling/models/base_ocr_model.py
@@ -1,6 +1,7 @@
 import copy
 import logging
 from abc import abstractmethod
+from pathlib import Path
 from typing import Iterable, List

 import numpy as np
@@ -10,12 +11,15 @@ from rtree import index
 from scipy.ndimage import find_objects, label

 from docling.datamodel.base_models import OcrCell, Page
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import OcrOptions
+from docling.datamodel.settings import settings
+from docling.models.base_model import BasePageModel

 _log = logging.getLogger(__name__)


-class BaseOcrModel:
+class BaseOcrModel(BasePageModel):
    def __init__(self, enabled: bool, options: OcrOptions):
        self.enabled = enabled
        self.options = options
@@ -113,7 +117,7 @@ class BaseOcrModel:
        ]
        return filtered_ocr_cells

-    def draw_ocr_rects_and_cells(self, page, ocr_rects):
+    def draw_ocr_rects_and_cells(self, conv_res, page, ocr_rects, show: bool = False):
        image = copy.deepcopy(page.image)
        draw = ImageDraw.Draw(image, "RGBA")

@@ -130,8 +134,21 @@ class BaseOcrModel:
            if isinstance(tc, OcrCell):
                color = "magenta"
            draw.rectangle([(x0, y0), (x1, y1)], outline=color)
-        image.show()
+
+        if show:
+            image.show()
+        else:
+            out_path: Path = (
+                Path(settings.debug.debug_output_path)
+                / f"debug_{conv_res.input.file.stem}"
+            )
+            out_path.mkdir(parents=True, exist_ok=True)
+
+            out_file = out_path / f"ocr_page_{page.page_no:05}.png"
+            image.save(str(out_file), format="png")

    @abstractmethod
-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
        pass
--- a/docling/models/ds_glm_model.py
+++ b/docling/models/ds_glm_model.py
@@ -1,5 +1,6 @@
 import copy
 import random
+from pathlib import Path
 from typing import List, Union

 from deepsearch_glm.nlp_utils import init_nlp_model
@@ -27,6 +28,8 @@ from pydantic import BaseModel, ConfigDict

 from docling.datamodel.base_models import Cluster, FigureElement, Table, TextElement
 from docling.datamodel.document import ConversionResult, layout_label_to_ds_type
+from docling.datamodel.settings import settings
+from docling.utils.profiling import ProfilingScope, TimeRecorder
 from docling.utils.utils import create_hash


@@ -226,23 +229,24 @@ class GlmModel:
        return ds_doc

    def __call__(self, conv_res: ConversionResult) -> DoclingDocument:
-        ds_doc = self._to_legacy_document(conv_res)
-        ds_doc_dict = ds_doc.model_dump(by_alias=True)
+        with TimeRecorder(conv_res, "glm", scope=ProfilingScope.DOCUMENT):
+            ds_doc = self._to_legacy_document(conv_res)
+            ds_doc_dict = ds_doc.model_dump(by_alias=True)

-        glm_doc = self.model.apply_on_doc(ds_doc_dict)
+            glm_doc = self.model.apply_on_doc(ds_doc_dict)

-        docling_doc: DoclingDocument = to_docling_document(glm_doc)  # Experimental
+            docling_doc: DoclingDocument = to_docling_document(glm_doc)  # Experimental

        # DEBUG code:
-        def draw_clusters_and_cells(ds_document, page_no):
+        def draw_clusters_and_cells(ds_document, page_no, show: bool = False):
            clusters_to_draw = []
            image = copy.deepcopy(conv_res.pages[page_no].image)
            for ix, elem in enumerate(ds_document.main_text):
                if isinstance(elem, BaseText):
-                    prov = elem.prov[0]
+                    prov = elem.prov[0]  # type: ignore
                elif isinstance(elem, Ref):
                    _, arr, index = elem.ref.split("/")
-                    index = int(index)
+                    index = int(index)  # type: ignore
                    if arr == "tables":
                        prov = ds_document.tables[index].prov[0]
                    elif arr == "figures":
@@ -256,7 +260,7 @@ class GlmModel:
                            id=ix,
                            label=elem.name,
                            bbox=BoundingBox.from_tuple(
-                                coord=prov.bbox,
+                                coord=prov.bbox,  # type: ignore
                                origin=CoordOrigin.BOTTOMLEFT,
                            ).to_top_left_origin(conv_res.pages[page_no].size.height),
                        )
@@ -276,9 +280,21 @@ class GlmModel:
                for tc in c.cells:  # [:1]:
                    x0, y0, x1, y1 = tc.bbox.as_tuple()
                    draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
-            image.show()

-        # draw_clusters_and_cells(ds_doc, 0)
-        # draw_clusters_and_cells(exported_doc, 0)
+            if show:
+                image.show()
+            else:
+                out_path: Path = (
+                    Path(settings.debug.debug_output_path)
+                    / f"debug_{conv_res.input.file.stem}"
+                )
+                out_path.mkdir(parents=True, exist_ok=True)
+
+                out_file = out_path / f"doc_page_{page_no:05}.png"
+                image.save(str(out_file), format="png")
+
+        # for item in ds_doc.page_dimensions:
+        #    page_no = item.page
+        #    draw_clusters_and_cells(ds_doc, page_no)

        return docling_doc
--- a/docling/models/easyocr_model.py
+++ b/docling/models/easyocr_model.py
@@ -5,8 +5,11 @@ import numpy
 from docling_core.types.doc import BoundingBox, CoordOrigin

 from docling.datamodel.base_models import OcrCell, Page
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import EasyOcrOptions
+from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
+from docling.utils.profiling import TimeRecorder

 _log = logging.getLogger(__name__)

@@ -33,58 +36,65 @@ class EasyOcrModel(BaseOcrModel):
                download_enabled=self.options.download_enabled,
            )

-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:

        if not self.enabled:
            yield from page_batch
            return

        for page in page_batch:
+
            assert page._backend is not None
            if not page._backend.is_valid():
                yield page
            else:
-                ocr_rects = self.get_ocr_rects(page)
+                with TimeRecorder(conv_res, "ocr"):
+                    ocr_rects = self.get_ocr_rects(page)

-                all_ocr_cells = []
-                for ocr_rect in ocr_rects:
-                    # Skip zero area boxes
-                    if ocr_rect.area() == 0:
-                        continue
-                    high_res_image = page._backend.get_page_image(
-                        scale=self.scale, cropbox=ocr_rect
-                    )
-                    im = numpy.array(high_res_image)
-                    result = self.reader.readtext(im)
-
-                    del high_res_image
-                    del im
-
-                    cells = [
-                        OcrCell(
-                            id=ix,
-                            text=line[1],
-                            confidence=line[2],
-                            bbox=BoundingBox.from_tuple(
-                                coord=(
-                                    (line[0][0][0] / self.scale) + ocr_rect.l,
-                                    (line[0][0][1] / self.scale) + ocr_rect.t,
-                                    (line[0][2][0] / self.scale) + ocr_rect.l,
-                                    (line[0][2][1] / self.scale) + ocr_rect.t,
-                                ),
-                                origin=CoordOrigin.TOPLEFT,
-                            ),
+                    all_ocr_cells = []
+                    for ocr_rect in ocr_rects:
+                        # Skip zero area boxes
+                        if ocr_rect.area() == 0:
+                            continue
+                        high_res_image = page._backend.get_page_image(
+                            scale=self.scale, cropbox=ocr_rect
                        )
-                        for ix, line in enumerate(result)
-                    ]
-                    all_ocr_cells.extend(cells)
+                        im = numpy.array(high_res_image)
+                        result = self.reader.readtext(im)

-                ## Remove OCR cells which overlap with programmatic cells.
-                filtered_ocr_cells = self.filter_ocr_cells(all_ocr_cells, page.cells)
+                        del high_res_image
+                        del im

-                page.cells.extend(filtered_ocr_cells)
+                        cells = [
+                            OcrCell(
+                                id=ix,
+                                text=line[1],
+                                confidence=line[2],
+                                bbox=BoundingBox.from_tuple(
+                                    coord=(
+                                        (line[0][0][0] / self.scale) + ocr_rect.l,
+                                        (line[0][0][1] / self.scale) + ocr_rect.t,
+                                        (line[0][2][0] / self.scale) + ocr_rect.l,
+                                        (line[0][2][1] / self.scale) + ocr_rect.t,
+                                    ),
+                                    origin=CoordOrigin.TOPLEFT,
+                                ),
+                            )
+                            for ix, line in enumerate(result)
+                        ]
+                        all_ocr_cells.extend(cells)
+
+                    ## Remove OCR cells which overlap with programmatic cells.
+                    filtered_ocr_cells = self.filter_ocr_cells(
+                        all_ocr_cells, page.cells
+                    )
+
+                    page.cells.extend(filtered_ocr_cells)

                # DEBUG code:
-                # self.draw_ocr_rects_and_cells(page, ocr_rects)
+                if settings.debug.visualize_ocr:
+                    self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)

                yield page
--- a/docling/models/layout_model.py
+++ b/docling/models/layout_model.py
@@ -16,8 +16,11 @@ from docling.datamodel.base_models import (
    LayoutPrediction,
    Page,
 )
+from docling.datamodel.document import ConversionResult
+from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
 from docling.utils import layout_utils as lu
+from docling.utils.profiling import TimeRecorder

 _log = logging.getLogger(__name__)

@@ -271,74 +274,97 @@ class LayoutModel(BasePageModel):

        return clusters_out_new, cells_out_new

-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
+
        for page in page_batch:
            assert page._backend is not None
            if not page._backend.is_valid():
                yield page
            else:
-                assert page.size is not None
+                with TimeRecorder(conv_res, "layout"):
+                    assert page.size is not None

-                clusters = []
-                for ix, pred_item in enumerate(
-                    self.layout_predictor.predict(page.get_image(scale=1.0))
-                ):
-                    label = DocItemLabel(
-                        pred_item["label"].lower().replace(" ", "_").replace("-", "_")
-                    )  # Temporary, until docling-ibm-model uses docling-core types
-                    cluster = Cluster(
-                        id=ix,
-                        label=label,
-                        confidence=pred_item["confidence"],
-                        bbox=BoundingBox.model_validate(pred_item),
-                        cells=[],
-                    )
-                    clusters.append(cluster)
-
-                # Map cells to clusters
-                # TODO: Remove, postprocess should take care of it anyway.
-                for cell in page.cells:
-                    for cluster in clusters:
-                        if not cell.bbox.area() > 0:
-                            overlap_frac = 0.0
-                        else:
-                            overlap_frac = (
-                                cell.bbox.intersection_area_with(cluster.bbox)
-                                / cell.bbox.area()
-                            )
-
-                        if overlap_frac > 0.5:
-                            cluster.cells.append(cell)
-
-                # Pre-sort clusters
-                # clusters = self.sort_clusters_by_cell_order(clusters)
-
-                # DEBUG code:
-                def draw_clusters_and_cells():
-                    image = copy.deepcopy(page.image)
-                    draw = ImageDraw.Draw(image)
-                    for c in clusters:
-                        x0, y0, x1, y1 = c.bbox.as_tuple()
-                        draw.rectangle([(x0, y0), (x1, y1)], outline="green")
-
-                        cell_color = (
-                            random.randint(30, 140),
-                            random.randint(30, 140),
-                            random.randint(30, 140),
+                    clusters = []
+                    for ix, pred_item in enumerate(
+                        self.layout_predictor.predict(page.get_image(scale=1.0))
+                    ):
+                        label = DocItemLabel(
+                            pred_item["label"]
+                            .lower()
+                            .replace(" ", "_")
+                            .replace("-", "_")
+                        )  # Temporary, until docling-ibm-model uses docling-core types
+                        cluster = Cluster(
+                            id=ix,
+                            label=label,
+                            confidence=pred_item["confidence"],
+                            bbox=BoundingBox.model_validate(pred_item),
+                            cells=[],
                        )
-                        for tc in c.cells:  # [:1]:
-                            x0, y0, x1, y1 = tc.bbox.as_tuple()
-                            draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
-                    image.show()
+                        clusters.append(cluster)

-                # draw_clusters_and_cells()
+                    # Map cells to clusters
+                    # TODO: Remove, postprocess should take care of it anyway.
+                    for cell in page.cells:
+                        for cluster in clusters:
+                            if not cell.bbox.area() > 0:
+                                overlap_frac = 0.0
+                            else:
+                                overlap_frac = (
+                                    cell.bbox.intersection_area_with(cluster.bbox)
+                                    / cell.bbox.area()
+                                )

-                clusters, page.cells = self.postprocess(
-                    clusters, page.cells, page.size.height
-                )
+                            if overlap_frac > 0.5:
+                                cluster.cells.append(cell)

-                # draw_clusters_and_cells()
+                    # Pre-sort clusters
+                    # clusters = self.sort_clusters_by_cell_order(clusters)

-                page.predictions.layout = LayoutPrediction(clusters=clusters)
+                    # DEBUG code:
+                    def draw_clusters_and_cells(show: bool = False):
+                        image = copy.deepcopy(page.image)
+                        if image is not None:
+                            draw = ImageDraw.Draw(image)
+                            for c in clusters:
+                                x0, y0, x1, y1 = c.bbox.as_tuple()
+                                draw.rectangle([(x0, y0), (x1, y1)], outline="green")
+
+                                cell_color = (
+                                    random.randint(30, 140),
+                                    random.randint(30, 140),
+                                    random.randint(30, 140),
+                                )
+                                for tc in c.cells:  # [:1]:
+                                    x0, y0, x1, y1 = tc.bbox.as_tuple()
+                                    draw.rectangle(
+                                        [(x0, y0), (x1, y1)], outline=cell_color
+                                    )
+                            if show:
+                                image.show()
+                            else:
+                                out_path: Path = (
+                                    Path(settings.debug.debug_output_path)
+                                    / f"debug_{conv_res.input.file.stem}"
+                                )
+                                out_path.mkdir(parents=True, exist_ok=True)
+
+                                out_file = (
+                                    out_path / f"layout_page_{page.page_no:05}.png"
+                                )
+                                image.save(str(out_file), format="png")
+
+                    # draw_clusters_and_cells()
+
+                    clusters, page.cells = self.postprocess(
+                        clusters, page.cells, page.size.height
+                    )
+
+                    page.predictions.layout = LayoutPrediction(clusters=clusters)
+
+                if settings.debug.visualize_layout:
+                    draw_clusters_and_cells()

                yield page
--- a/docling/models/page_assemble_model.py
+++ b/docling/models/page_assemble_model.py
@@ -12,8 +12,10 @@ from docling.datamodel.base_models import (
    Table,
    TextElement,
 )
+from docling.datamodel.document import ConversionResult
 from docling.models.base_model import BasePageModel
 from docling.models.layout_model import LayoutModel
+from docling.utils.profiling import TimeRecorder

 _log = logging.getLogger(__name__)

@@ -51,122 +53,122 @@ class PageAssembleModel(BasePageModel):

        return sanitized_text.strip()  # Strip any leading or trailing whitespace

-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
        for page in page_batch:
            assert page._backend is not None
            if not page._backend.is_valid():
                yield page
            else:
-                assert page.predictions.layout is not None
+                with TimeRecorder(conv_res, "page_assemble"):

-                # assembles some JSON output page by page.
+                    assert page.predictions.layout is not None

-                elements: List[PageElement] = []
-                headers: List[PageElement] = []
-                body: List[PageElement] = []
+                    # assembles some JSON output page by page.

-                for cluster in page.predictions.layout.clusters:
-                    # _log.info("Cluster label seen:", cluster.label)
-                    if cluster.label in LayoutModel.TEXT_ELEM_LABELS:
+                    elements: List[PageElement] = []
+                    headers: List[PageElement] = []
+                    body: List[PageElement] = []

-                        textlines = [
-                            cell.text.replace("\x02", "-").strip()
-                            for cell in cluster.cells
-                            if len(cell.text.strip()) > 0
-                        ]
-                        text = self.sanitize_text(textlines)
-                        text_el = TextElement(
-                            label=cluster.label,
-                            id=cluster.id,
-                            text=text,
-                            page_no=page.page_no,
-                            cluster=cluster,
-                        )
-                        elements.append(text_el)
+                    for cluster in page.predictions.layout.clusters:
+                        # _log.info("Cluster label seen:", cluster.label)
+                        if cluster.label in LayoutModel.TEXT_ELEM_LABELS:

-                        if cluster.label in LayoutModel.PAGE_HEADER_LABELS:
-                            headers.append(text_el)
-                        else:
-                            body.append(text_el)
-                    elif cluster.label == LayoutModel.TABLE_LABEL:
-                        tbl = None
-                        if page.predictions.tablestructure:
-                            tbl = page.predictions.tablestructure.table_map.get(
-                                cluster.id, None
-                            )
-                        if (
-                            not tbl
-                        ):  # fallback: add table without structure, if it isn't present
-                            tbl = Table(
+                            textlines = [
+                                cell.text.replace("\x02", "-").strip()
+                                for cell in cluster.cells
+                                if len(cell.text.strip()) > 0
+                            ]
+                            text = self.sanitize_text(textlines)
+                            text_el = TextElement(
                                label=cluster.label,
                                id=cluster.id,
-                                text="",
-                                otsl_seq=[],
-                                table_cells=[],
-                                cluster=cluster,
-                                page_no=page.page_no,
-                            )
-
-                        elements.append(tbl)
-                        body.append(tbl)
-                    elif cluster.label == LayoutModel.FIGURE_LABEL:
-                        fig = None
-                        if page.predictions.figures_classification:
-                            fig = (
-                                page.predictions.figures_classification.figure_map.get(
-                                    cluster.id, None
-                                )
-                            )
-                        if (
-                            not fig
-                        ):  # fallback: add figure without classification, if it isn't present
-                            fig = FigureElement(
-                                label=cluster.label,
-                                id=cluster.id,
-                                text="",
-                                data=None,
-                                cluster=cluster,
-                                page_no=page.page_no,
-                            )
-                        elements.append(fig)
-                        body.append(fig)
-                    elif cluster.label == LayoutModel.FORMULA_LABEL:
-                        equation = None
-                        if page.predictions.equations_prediction:
-                            equation = (
-                                page.predictions.equations_prediction.equation_map.get(
-                                    cluster.id, None
-                                )
-                            )
-                        if (
-                            not equation
-                        ):  # fallback: add empty formula, if it isn't present
-                            text = self.sanitize_text(
-                                [
-                                    cell.text.replace("\x02", "-").strip()
-                                    for cell in cluster.cells
-                                    if len(cell.text.strip()) > 0
-                                ]
-                            )
-                            equation = TextElement(
-                                label=cluster.label,
-                                id=cluster.id,
-                                cluster=cluster,
-                                page_no=page.page_no,
                                text=text,
+                                page_no=page.page_no,
+                                cluster=cluster,
                            )
-                        elements.append(equation)
-                        body.append(equation)
+                            elements.append(text_el)

-                page.assembled = AssembledUnit(
-                    elements=elements, headers=headers, body=body
-                )
+                            if cluster.label in LayoutModel.PAGE_HEADER_LABELS:
+                                headers.append(text_el)
+                            else:
+                                body.append(text_el)
+                        elif cluster.label == LayoutModel.TABLE_LABEL:
+                            tbl = None
+                            if page.predictions.tablestructure:
+                                tbl = page.predictions.tablestructure.table_map.get(
+                                    cluster.id, None
+                                )
+                            if (
+                                not tbl
+                            ):  # fallback: add table without structure, if it isn't present
+                                tbl = Table(
+                                    label=cluster.label,
+                                    id=cluster.id,
+                                    text="",
+                                    otsl_seq=[],
+                                    table_cells=[],
+                                    cluster=cluster,
+                                    page_no=page.page_no,
+                                )

-                # Remove page images (can be disabled)
-                if not self.options.keep_images:
-                    page._image_cache = {}
+                            elements.append(tbl)
+                            body.append(tbl)
+                        elif cluster.label == LayoutModel.FIGURE_LABEL:
+                            fig = None
+                            if page.predictions.figures_classification:
+                                fig = page.predictions.figures_classification.figure_map.get(
+                                    cluster.id, None
+                                )
+                            if (
+                                not fig
+                            ):  # fallback: add figure without classification, if it isn't present
+                                fig = FigureElement(
+                                    label=cluster.label,
+                                    id=cluster.id,
+                                    text="",
+                                    data=None,
+                                    cluster=cluster,
+                                    page_no=page.page_no,
+                                )
+                            elements.append(fig)
+                            body.append(fig)
+                        elif cluster.label == LayoutModel.FORMULA_LABEL:
+                            equation = None
+                            if page.predictions.equations_prediction:
+                                equation = page.predictions.equations_prediction.equation_map.get(
+                                    cluster.id, None
+                                )
+                            if (
+                                not equation
+                            ):  # fallback: add empty formula, if it isn't present
+                                text = self.sanitize_text(
+                                    [
+                                        cell.text.replace("\x02", "-").strip()
+                                        for cell in cluster.cells
+                                        if len(cell.text.strip()) > 0
+                                    ]
+                                )
+                                equation = TextElement(
+                                    label=cluster.label,
+                                    id=cluster.id,
+                                    cluster=cluster,
+                                    page_no=page.page_no,
+                                    text=text,
+                                )
+                            elements.append(equation)
+                            body.append(equation)

-                # Unload backend
-                page._backend.unload()
+                    page.assembled = AssembledUnit(
+                        elements=elements, headers=headers, body=body
+                    )
+
+                    # Remove page images (can be disabled)
+                    if not self.options.keep_images:
+                        page._image_cache = {}
+
+                    # Unload backend
+                    page._backend.unload()

                yield page
--- a/docling/models/page_preprocessing_model.py
+++ b/docling/models/page_preprocessing_model.py
@@ -1,10 +1,14 @@
+from pathlib import Path
 from typing import Iterable, Optional

 from PIL import ImageDraw
 from pydantic import BaseModel

 from docling.datamodel.base_models import Page
+from docling.datamodel.document import ConversionResult
+from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
+from docling.utils.profiling import TimeRecorder


 class PagePreprocessingOptions(BaseModel):
@@ -15,14 +19,17 @@ class PagePreprocessingModel(BasePageModel):
    def __init__(self, options: PagePreprocessingOptions):
        self.options = options

-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
        for page in page_batch:
            assert page._backend is not None
            if not page._backend.is_valid():
                yield page
            else:
-                page = self._populate_page_images(page)
-                page = self._parse_page_cells(page)
+                with TimeRecorder(conv_res, "page_parse"):
+                    page = self._populate_page_images(page)
+                    page = self._parse_page_cells(conv_res, page)
                yield page

    # Generate the page image and store it in the page object
@@ -43,19 +50,30 @@ class PagePreprocessingModel(BasePageModel):
        return page

    # Extract and populate the page cells and store it in the page object
-    def _parse_page_cells(self, page: Page) -> Page:
+    def _parse_page_cells(self, conv_res: ConversionResult, page: Page) -> Page:
        assert page._backend is not None

        page.cells = list(page._backend.get_text_cells())

        # DEBUG code:
-        def draw_text_boxes(image, cells):
+        def draw_text_boxes(image, cells, show: bool = False):
            draw = ImageDraw.Draw(image)
            for c in cells:
                x0, y0, x1, y1 = c.bbox.as_tuple()
                draw.rectangle([(x0, y0), (x1, y1)], outline="red")
-            image.show()
+            if show:
+                image.show()
+            else:
+                out_path: Path = (
+                    Path(settings.debug.debug_output_path)
+                    / f"debug_{conv_res.input.file.stem}"
+                )
+                out_path.mkdir(parents=True, exist_ok=True)

-        # draw_text_boxes(page.get_image(scale=1.0), cells)
+                out_file = out_path / f"cells_page_{page.page_no:05}.png"
+                image.save(str(out_file), format="png")
+
+        if settings.debug.visualize_cells:
+            draw_text_boxes(page.get_image(scale=1.0), page.cells)

        return page
--- a/docling/models/table_structure_model.py
+++ b/docling/models/table_structure_model.py
@@ -1,6 +1,6 @@
 import copy
 from pathlib import Path
-from typing import Iterable, List
+from typing import Iterable

 import numpy
 from docling_core.types.doc import BoundingBox, DocItemLabel, TableCell
@@ -8,8 +8,11 @@ from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredic
 from PIL import ImageDraw

 from docling.datamodel.base_models import Page, Table, TableStructurePrediction
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import TableFormerMode, TableStructureOptions
+from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
+from docling.utils.profiling import TimeRecorder


 class TableStructureModel(BasePageModel):
@@ -35,7 +38,13 @@ class TableStructureModel(BasePageModel):
            self.tf_predictor = TFPredictor(self.tm_config)
            self.scale = 2.0  # Scale up table input images to 144 dpi

-    def draw_table_and_cells(self, page: Page, tbl_list: List[Table]):
+    def draw_table_and_cells(
+        self,
+        conv_res: ConversionResult,
+        page: Page,
+        tbl_list: Iterable[Table],
+        show: bool = False,
+    ):
        assert page._backend is not None

        image = (
@@ -61,9 +70,21 @@ class TableStructureModel(BasePageModel):
                        fill="black",
                    )

-        image.show()
+        if show:
+            image.show()
+        else:
+            out_path: Path = (
+                Path(settings.debug.debug_output_path)
+                / f"debug_{conv_res.input.file.stem}"
+            )
+            out_path.mkdir(parents=True, exist_ok=True)

-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+            out_file = out_path / f"table_struct_page_{page.page_no:05}.png"
+            image.save(str(out_file), format="png")
+
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:

        if not self.enabled:
            yield from page_batch
@@ -74,98 +95,112 @@ class TableStructureModel(BasePageModel):
            if not page._backend.is_valid():
                yield page
            else:
+                with TimeRecorder(conv_res, "table_structure"):

-                assert page.predictions.layout is not None
-                assert page.size is not None
+                    assert page.predictions.layout is not None
+                    assert page.size is not None

-                page.predictions.tablestructure = TableStructurePrediction()  # dummy
+                    page.predictions.tablestructure = (
+                        TableStructurePrediction()
+                    )  # dummy

-                in_tables = [
-                    (
-                        cluster,
-                        [
-                            round(cluster.bbox.l) * self.scale,
-                            round(cluster.bbox.t) * self.scale,
-                            round(cluster.bbox.r) * self.scale,
-                            round(cluster.bbox.b) * self.scale,
-                        ],
+                    in_tables = [
+                        (
+                            cluster,
+                            [
+                                round(cluster.bbox.l) * self.scale,
+                                round(cluster.bbox.t) * self.scale,
+                                round(cluster.bbox.r) * self.scale,
+                                round(cluster.bbox.b) * self.scale,
+                            ],
+                        )
+                        for cluster in page.predictions.layout.clusters
+                        if cluster.label == DocItemLabel.TABLE
+                    ]
+                    if not len(in_tables):
+                        yield page
+                        continue
+
+                    tokens = []
+                    for c in page.cells:
+                        for cluster, _ in in_tables:
+                            if c.bbox.area() > 0:
+                                if (
+                                    c.bbox.intersection_area_with(cluster.bbox)
+                                    / c.bbox.area()
+                                    > 0.2
+                                ):
+                                    # Only allow non empty stings (spaces) into the cells of a table
+                                    if len(c.text.strip()) > 0:
+                                        new_cell = copy.deepcopy(c)
+                                        new_cell.bbox = new_cell.bbox.scaled(
+                                            scale=self.scale
+                                        )
+
+                                        tokens.append(new_cell.model_dump())
+
+                    page_input = {
+                        "tokens": tokens,
+                        "width": page.size.width * self.scale,
+                        "height": page.size.height * self.scale,
+                    }
+                    page_input["image"] = numpy.asarray(
+                        page.get_image(scale=self.scale)
                    )
-                    for cluster in page.predictions.layout.clusters
-                    if cluster.label == DocItemLabel.TABLE
-                ]
-                if not len(in_tables):
-                    yield page
-                    continue

-                tokens = []
-                for c in page.cells:
-                    for cluster, _ in in_tables:
-                        if c.bbox.area() > 0:
-                            if (
-                                c.bbox.intersection_area_with(cluster.bbox)
-                                / c.bbox.area()
-                                > 0.2
-                            ):
-                                # Only allow non empty stings (spaces) into the cells of a table
-                                if len(c.text.strip()) > 0:
-                                    new_cell = copy.deepcopy(c)
-                                    new_cell.bbox = new_cell.bbox.scaled(
-                                        scale=self.scale
+                    table_clusters, table_bboxes = zip(*in_tables)
+
+                    if len(table_bboxes):
+                        tf_output = self.tf_predictor.multi_table_predict(
+                            page_input, table_bboxes, do_matching=self.do_cell_matching
+                        )
+
+                        for table_cluster, table_out in zip(table_clusters, tf_output):
+                            table_cells = []
+                            for element in table_out["tf_responses"]:
+
+                                if not self.do_cell_matching:
+                                    the_bbox = BoundingBox.model_validate(
+                                        element["bbox"]
+                                    ).scaled(1 / self.scale)
+                                    text_piece = page._backend.get_text_in_rect(
+                                        the_bbox
                                    )
+                                    element["bbox"]["token"] = text_piece

-                                    tokens.append(new_cell.model_dump())
+                                tc = TableCell.model_validate(element)
+                                if self.do_cell_matching and tc.bbox is not None:
+                                    tc.bbox = tc.bbox.scaled(1 / self.scale)
+                                table_cells.append(tc)

-                page_input = {
-                    "tokens": tokens,
-                    "width": page.size.width * self.scale,
-                    "height": page.size.height * self.scale,
-                }
-                page_input["image"] = numpy.asarray(page.get_image(scale=self.scale))
+                            # Retrieving cols/rows, after post processing:
+                            num_rows = table_out["predict_details"]["num_rows"]
+                            num_cols = table_out["predict_details"]["num_cols"]
+                            otsl_seq = table_out["predict_details"]["prediction"][
+                                "rs_seq"
+                            ]

-                table_clusters, table_bboxes = zip(*in_tables)
+                            tbl = Table(
+                                otsl_seq=otsl_seq,
+                                table_cells=table_cells,
+                                num_rows=num_rows,
+                                num_cols=num_cols,
+                                id=table_cluster.id,
+                                page_no=page.page_no,
+                                cluster=table_cluster,
+                                label=DocItemLabel.TABLE,
+                            )

-                if len(table_bboxes):
-                    tf_output = self.tf_predictor.multi_table_predict(
-                        page_input, table_bboxes, do_matching=self.do_cell_matching
-                    )
-
-                    for table_cluster, table_out in zip(table_clusters, tf_output):
-                        table_cells = []
-                        for element in table_out["tf_responses"]:
-
-                            if not self.do_cell_matching:
-                                the_bbox = BoundingBox.model_validate(
-                                    element["bbox"]
-                                ).scaled(1 / self.scale)
-                                text_piece = page._backend.get_text_in_rect(the_bbox)
-                                element["bbox"]["token"] = text_piece
-
-                            tc = TableCell.model_validate(element)
-                            if self.do_cell_matching and tc.bbox is not None:
-                                tc.bbox = tc.bbox.scaled(1 / self.scale)
-                            table_cells.append(tc)
-
-                        # Retrieving cols/rows, after post processing:
-                        num_rows = table_out["predict_details"]["num_rows"]
-                        num_cols = table_out["predict_details"]["num_cols"]
-                        otsl_seq = table_out["predict_details"]["prediction"]["rs_seq"]
-
-                        tbl = Table(
-                            otsl_seq=otsl_seq,
-                            table_cells=table_cells,
-                            num_rows=num_rows,
-                            num_cols=num_cols,
-                            id=table_cluster.id,
-                            page_no=page.page_no,
-                            cluster=table_cluster,
-                            label=DocItemLabel.TABLE,
-                        )
-
-                        page.predictions.tablestructure.table_map[table_cluster.id] = (
-                            tbl
-                        )
+                            page.predictions.tablestructure.table_map[
+                                table_cluster.id
+                            ] = tbl

                    # For debugging purposes:
-                    # self.draw_table_and_cells(page, page.predictions.tablestructure.table_map.values())
+                    if settings.debug.visualize_tables:
+                        self.draw_table_and_cells(
+                            conv_res,
+                            page,
+                            page.predictions.tablestructure.table_map.values(),
+                        )

                yield page
--- a/docling/models/tesseract_ocr_cli_model.py
+++ b/docling/models/tesseract_ocr_cli_model.py
@@ -8,8 +8,11 @@ import pandas as pd
 from docling_core.types.doc import BoundingBox, CoordOrigin

 from docling.datamodel.base_models import OcrCell, Page
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import TesseractCliOcrOptions
+from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
+from docling.utils.profiling import TimeRecorder

 _log = logging.getLogger(__name__)

@@ -102,7 +105,9 @@ class TesseractOcrCliModel(BaseOcrModel):

        return df_filtered

-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:

        if not self.enabled:
            yield from page_batch
@@ -113,62 +118,67 @@ class TesseractOcrCliModel(BaseOcrModel):
            if not page._backend.is_valid():
                yield page
            else:
-                ocr_rects = self.get_ocr_rects(page)
+                with TimeRecorder(conv_res, "ocr"):

-                all_ocr_cells = []
-                for ocr_rect in ocr_rects:
-                    # Skip zero area boxes
-                    if ocr_rect.area() == 0:
-                        continue
-                    high_res_image = page._backend.get_page_image(
-                        scale=self.scale, cropbox=ocr_rect
+                    ocr_rects = self.get_ocr_rects(page)
+
+                    all_ocr_cells = []
+                    for ocr_rect in ocr_rects:
+                        # Skip zero area boxes
+                        if ocr_rect.area() == 0:
+                            continue
+                        high_res_image = page._backend.get_page_image(
+                            scale=self.scale, cropbox=ocr_rect
+                        )
+
+                        with tempfile.NamedTemporaryFile(
+                            suffix=".png", mode="w"
+                        ) as image_file:
+                            fname = image_file.name
+                            high_res_image.save(fname)
+
+                            df = self._run_tesseract(fname)
+
+                        # _log.info(df)
+
+                        # Print relevant columns (bounding box and text)
+                        for ix, row in df.iterrows():
+                            text = row["text"]
+                            conf = row["conf"]
+
+                            l = float(row["left"])
+                            b = float(row["top"])
+                            w = float(row["width"])
+                            h = float(row["height"])
+
+                            t = b + h
+                            r = l + w
+
+                            cell = OcrCell(
+                                id=ix,
+                                text=text,
+                                confidence=conf / 100.0,
+                                bbox=BoundingBox.from_tuple(
+                                    coord=(
+                                        (l / self.scale) + ocr_rect.l,
+                                        (b / self.scale) + ocr_rect.t,
+                                        (r / self.scale) + ocr_rect.l,
+                                        (t / self.scale) + ocr_rect.t,
+                                    ),
+                                    origin=CoordOrigin.TOPLEFT,
+                                ),
+                            )
+                            all_ocr_cells.append(cell)
+
+                    ## Remove OCR cells which overlap with programmatic cells.
+                    filtered_ocr_cells = self.filter_ocr_cells(
+                        all_ocr_cells, page.cells
                    )

-                    with tempfile.NamedTemporaryFile(
-                        suffix=".png", mode="w"
-                    ) as image_file:
-                        fname = image_file.name
-                        high_res_image.save(fname)
-
-                        df = self._run_tesseract(fname)
-
-                    # _log.info(df)
-
-                    # Print relevant columns (bounding box and text)
-                    for ix, row in df.iterrows():
-                        text = row["text"]
-                        conf = row["conf"]
-
-                        l = float(row["left"])
-                        b = float(row["top"])
-                        w = float(row["width"])
-                        h = float(row["height"])
-
-                        t = b + h
-                        r = l + w
-
-                        cell = OcrCell(
-                            id=ix,
-                            text=text,
-                            confidence=conf / 100.0,
-                            bbox=BoundingBox.from_tuple(
-                                coord=(
-                                    (l / self.scale) + ocr_rect.l,
-                                    (b / self.scale) + ocr_rect.t,
-                                    (r / self.scale) + ocr_rect.l,
-                                    (t / self.scale) + ocr_rect.t,
-                                ),
-                                origin=CoordOrigin.TOPLEFT,
-                            ),
-                        )
-                        all_ocr_cells.append(cell)
-
-                ## Remove OCR cells which overlap with programmatic cells.
-                filtered_ocr_cells = self.filter_ocr_cells(all_ocr_cells, page.cells)
-
-                page.cells.extend(filtered_ocr_cells)
+                    page.cells.extend(filtered_ocr_cells)

                # DEBUG code:
-                # self.draw_ocr_rects_and_cells(page, ocr_rects)
+                if settings.debug.visualize_ocr:
+                    self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)

                yield page
--- a/docling/models/tesseract_ocr_model.py
+++ b/docling/models/tesseract_ocr_model.py
@@ -4,8 +4,11 @@ from typing import Iterable
 from docling_core.types.doc import BoundingBox, CoordOrigin

 from docling.datamodel.base_models import OcrCell, Page
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import TesseractOcrOptions
+from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
+from docling.utils.profiling import TimeRecorder

 _log = logging.getLogger(__name__)

@@ -61,7 +64,9 @@ class TesseractOcrModel(BaseOcrModel):
            # Finalize the tesseractAPI
            self.reader.End()

-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:

        if not self.enabled:
            yield from page_batch
@@ -72,59 +77,66 @@ class TesseractOcrModel(BaseOcrModel):
            if not page._backend.is_valid():
                yield page
            else:
-                assert self.reader is not None
+                with TimeRecorder(conv_res, "ocr"):

-                ocr_rects = self.get_ocr_rects(page)
+                    assert self.reader is not None

-                all_ocr_cells = []
-                for ocr_rect in ocr_rects:
-                    # Skip zero area boxes
-                    if ocr_rect.area() == 0:
-                        continue
-                    high_res_image = page._backend.get_page_image(
-                        scale=self.scale, cropbox=ocr_rect
-                    )
+                    ocr_rects = self.get_ocr_rects(page)

-                    # Retrieve text snippets with their bounding boxes
-                    self.reader.SetImage(high_res_image)
-                    boxes = self.reader.GetComponentImages(
-                        self.reader_RIL.TEXTLINE, True
-                    )
-
-                    cells = []
-                    for ix, (im, box, _, _) in enumerate(boxes):
-                        # Set the area of interest. Tesseract uses Bottom-Left for the origin
-                        self.reader.SetRectangle(box["x"], box["y"], box["w"], box["h"])
-
-                        # Extract text within the bounding box
-                        text = self.reader.GetUTF8Text().strip()
-                        confidence = self.reader.MeanTextConf()
-                        left = box["x"] / self.scale
-                        bottom = box["y"] / self.scale
-                        right = (box["x"] + box["w"]) / self.scale
-                        top = (box["y"] + box["h"]) / self.scale
-
-                        cells.append(
-                            OcrCell(
-                                id=ix,
-                                text=text,
-                                confidence=confidence,
-                                bbox=BoundingBox.from_tuple(
-                                    coord=(left, top, right, bottom),
-                                    origin=CoordOrigin.TOPLEFT,
-                                ),
-                            )
+                    all_ocr_cells = []
+                    for ocr_rect in ocr_rects:
+                        # Skip zero area boxes
+                        if ocr_rect.area() == 0:
+                            continue
+                        high_res_image = page._backend.get_page_image(
+                            scale=self.scale, cropbox=ocr_rect
                        )

-                    # del high_res_image
-                    all_ocr_cells.extend(cells)
+                        # Retrieve text snippets with their bounding boxes
+                        self.reader.SetImage(high_res_image)
+                        boxes = self.reader.GetComponentImages(
+                            self.reader_RIL.TEXTLINE, True
+                        )

-                ## Remove OCR cells which overlap with programmatic cells.
-                filtered_ocr_cells = self.filter_ocr_cells(all_ocr_cells, page.cells)
+                        cells = []
+                        for ix, (im, box, _, _) in enumerate(boxes):
+                            # Set the area of interest. Tesseract uses Bottom-Left for the origin
+                            self.reader.SetRectangle(
+                                box["x"], box["y"], box["w"], box["h"]
+                            )

-                page.cells.extend(filtered_ocr_cells)
+                            # Extract text within the bounding box
+                            text = self.reader.GetUTF8Text().strip()
+                            confidence = self.reader.MeanTextConf()
+                            left = box["x"] / self.scale
+                            bottom = box["y"] / self.scale
+                            right = (box["x"] + box["w"]) / self.scale
+                            top = (box["y"] + box["h"]) / self.scale
+
+                            cells.append(
+                                OcrCell(
+                                    id=ix,
+                                    text=text,
+                                    confidence=confidence,
+                                    bbox=BoundingBox.from_tuple(
+                                        coord=(left, top, right, bottom),
+                                        origin=CoordOrigin.TOPLEFT,
+                                    ),
+                                )
+                            )
+
+                        # del high_res_image
+                        all_ocr_cells.extend(cells)
+
+                    ## Remove OCR cells which overlap with programmatic cells.
+                    filtered_ocr_cells = self.filter_ocr_cells(
+                        all_ocr_cells, page.cells
+                    )
+
+                    page.cells.extend(filtered_ocr_cells)

                # DEBUG code:
-                # self.draw_ocr_rects_and_cells(page, ocr_rects)
+                if settings.debug.visualize_ocr:
+                    self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)

                yield page
--- a/docling/pipeline/base_pipeline.py
+++ b/docling/pipeline/base_pipeline.py
@@ -19,6 +19,7 @@ from docling.datamodel.document import ConversionResult, InputDocument
 from docling.datamodel.pipeline_options import PipelineOptions
 from docling.datamodel.settings import settings
 from docling.models.base_model import BaseEnrichmentModel
+from docling.utils.profiling import ProfilingScope, TimeRecorder
 from docling.utils.utils import chunkify

 _log = logging.getLogger(__name__)
@@ -35,13 +36,16 @@ class BasePipeline(ABC):

        _log.info(f"Processing document {in_doc.file.name}")
        try:
-            # These steps are building and assembling the structure of the
-            # output DoclingDocument
-            conv_res = self._build_document(in_doc, conv_res)
-            conv_res = self._assemble_document(in_doc, conv_res)
-            # From this stage, all operations should rely only on conv_res.output
-            conv_res = self._enrich_document(in_doc, conv_res)
-            conv_res.status = self._determine_status(in_doc, conv_res)
+            with TimeRecorder(
+                conv_res, "pipeline_total", scope=ProfilingScope.DOCUMENT
+            ):
+                # These steps are building and assembling the structure of the
+                # output DoclingDocument
+                conv_res = self._build_document(conv_res)
+                conv_res = self._assemble_document(conv_res)
+                # From this stage, all operations should rely only on conv_res.output
+                conv_res = self._enrich_document(conv_res)
+                conv_res.status = self._determine_status(conv_res)
        except Exception as e:
            conv_res.status = ConversionStatus.FAILURE
            if raises_on_error:
@@ -50,19 +54,13 @@ class BasePipeline(ABC):
        return conv_res

    @abstractmethod
-    def _build_document(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionResult:
+    def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
        pass

-    def _assemble_document(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionResult:
+    def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult:
        return conv_res

-    def _enrich_document(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionResult:
+    def _enrich_document(self, conv_res: ConversionResult) -> ConversionResult:

        def _filter_elements(
            doc: DoclingDocument, model: BaseEnrichmentModel
@@ -71,24 +69,23 @@ class BasePipeline(ABC):
                if model.is_processable(doc=doc, element=element):
                    yield element

-        for model in self.enrichment_pipe:
-            for element_batch in chunkify(
-                _filter_elements(conv_res.document, model),
-                settings.perf.elements_batch_size,
-            ):
-                # TODO: currently we assume the element itself is modified, because
-                # we don't have an interface to save the element back to the document
-                for element in model(
-                    doc=conv_res.document, element_batch=element_batch
-                ):  # Must exhaust!
-                    pass
+        with TimeRecorder(conv_res, "doc_enrich", scope=ProfilingScope.DOCUMENT):
+            for model in self.enrichment_pipe:
+                for element_batch in chunkify(
+                    _filter_elements(conv_res.document, model),
+                    settings.perf.elements_batch_size,
+                ):
+                    # TODO: currently we assume the element itself is modified, because
+                    # we don't have an interface to save the element back to the document
+                    for element in model(
+                        doc=conv_res.document, element_batch=element_batch
+                    ):  # Must exhaust!
+                        pass

        return conv_res

    @abstractmethod
-    def _determine_status(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionStatus:
+    def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
        pass

    @classmethod
@@ -110,66 +107,68 @@ class BasePipeline(ABC):

 class PaginatedPipeline(BasePipeline):  # TODO this is a bad name.

-    def _apply_on_pages(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def _apply_on_pages(
+        self, conv_res: ConversionResult, page_batch: Iterable[Page]
+    ) -> Iterable[Page]:
        for model in self.build_pipe:
-            page_batch = model(page_batch)
+            page_batch = model(conv_res, page_batch)

        yield from page_batch

-    def _build_document(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionResult:
+    def _build_document(self, conv_res: ConversionResult) -> ConversionResult:

-        if not isinstance(in_doc._backend, PdfDocumentBackend):
+        if not isinstance(conv_res.input._backend, PdfDocumentBackend):
            raise RuntimeError(
-                f"The selected backend {type(in_doc._backend).__name__} for {in_doc.file} is not a PDF backend. "
+                f"The selected backend {type(conv_res.input._backend).__name__} for {conv_res.input.file} is not a PDF backend. "
                f"Can not convert this with a PDF pipeline. "
                f"Please check your format configuration on DocumentConverter."
            )
            # conv_res.status = ConversionStatus.FAILURE
            # return conv_res

-        for i in range(0, in_doc.page_count):
-            conv_res.pages.append(Page(page_no=i))
+        with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):

-        try:
-            # Iterate batches of pages (page_batch_size) in the doc
-            for page_batch in chunkify(conv_res.pages, settings.perf.page_batch_size):
-                start_pb_time = time.time()
+            for i in range(0, conv_res.input.page_count):
+                conv_res.pages.append(Page(page_no=i))

-                # 1. Initialise the page resources
-                init_pages = map(
-                    functools.partial(self.initialize_page, in_doc), page_batch
+            try:
+                # Iterate batches of pages (page_batch_size) in the doc
+                for page_batch in chunkify(
+                    conv_res.pages, settings.perf.page_batch_size
+                ):
+                    start_pb_time = time.time()
+
+                    # 1. Initialise the page resources
+                    init_pages = map(
+                        functools.partial(self.initialize_page, conv_res), page_batch
+                    )
+
+                    # 2. Run pipeline stages
+                    pipeline_pages = self._apply_on_pages(conv_res, init_pages)
+
+                    for p in pipeline_pages:  # Must exhaust!
+                        pass
+
+                    end_pb_time = time.time() - start_pb_time
+                    _log.debug(f"Finished converting page batch time={end_pb_time:.3f}")
+
+            except Exception as e:
+                conv_res.status = ConversionStatus.FAILURE
+                trace = "\n".join(traceback.format_exception(e))
+                _log.warning(
+                    f"Encountered an error during conversion of document {conv_res.input.document_hash}:\n"
+                    f"{trace}"
                )
+                raise e

-                # 2. Run pipeline stages
-                pipeline_pages = self._apply_on_pages(init_pages)
-
-                for p in pipeline_pages:  # Must exhaust!
-                    pass
-
-                end_pb_time = time.time() - start_pb_time
-                _log.debug(f"Finished converting page batch time={end_pb_time:.3f}")
-
-        except Exception as e:
-            conv_res.status = ConversionStatus.FAILURE
-            trace = "\n".join(traceback.format_exception(e))
-            _log.warning(
-                f"Encountered an error during conversion of document {in_doc.document_hash}:\n"
-                f"{trace}"
-            )
-            raise e
-
-        finally:
-            # Always unload the PDF backend, even in case of failure
-            if in_doc._backend:
-                in_doc._backend.unload()
+            finally:
+                # Always unload the PDF backend, even in case of failure
+                if conv_res.input._backend:
+                    conv_res.input._backend.unload()

        return conv_res

-    def _determine_status(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionStatus:
+    def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
        status = ConversionStatus.SUCCESS
        for page in conv_res.pages:
            if page._backend is None or not page._backend.is_valid():
@@ -186,5 +185,5 @@ class PaginatedPipeline(BasePipeline):  # TODO this is a bad name.

    # Initialise and load resources for a page
    @abstractmethod
-    def initialize_page(self, doc: InputDocument, page: Page) -> Page:
+    def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:
        pass
--- a/docling/pipeline/simple_pipeline.py
+++ b/docling/pipeline/simple_pipeline.py
@@ -5,9 +5,10 @@ from docling.backend.abstract_backend import (
    DeclarativeDocumentBackend,
 )
 from docling.datamodel.base_models import ConversionStatus
-from docling.datamodel.document import ConversionResult, InputDocument
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import PipelineOptions
 from docling.pipeline.base_pipeline import BasePipeline
+from docling.utils.profiling import ProfilingScope, TimeRecorder

 _log = logging.getLogger(__name__)

@@ -22,13 +23,11 @@ class SimplePipeline(BasePipeline):
    def __init__(self, pipeline_options: PipelineOptions):
        super().__init__(pipeline_options)

-    def _build_document(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionResult:
+    def _build_document(self, conv_res: ConversionResult) -> ConversionResult:

-        if not isinstance(in_doc._backend, DeclarativeDocumentBackend):
+        if not isinstance(conv_res.input._backend, DeclarativeDocumentBackend):
            raise RuntimeError(
-                f"The selected backend {type(in_doc._backend).__name__} for {in_doc.file} is not a declarative backend. "
+                f"The selected backend {type(conv_res.input._backend).__name__} for {conv_res.input.file} is not a declarative backend. "
                f"Can not convert this with simple pipeline. "
                f"Please check your format configuration on DocumentConverter."
            )
@@ -38,13 +37,11 @@ class SimplePipeline(BasePipeline):
        # Instead of running a page-level pipeline to build up the document structure,
        # the backend is expected to be of type DeclarativeDocumentBackend, which can output
        # a DoclingDocument straight.
-
-        conv_res.document = in_doc._backend.convert()
+        with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
+            conv_res.document = conv_res.input._backend.convert()
        return conv_res

-    def _determine_status(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionStatus:
+    def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
        # This is called only if the previous steps didn't raise.
        # Since we don't have anything else to evaluate, we can
        # safely return SUCCESS.
--- a/docling/pipeline/standard_pdf_pipeline.py
+++ b/docling/pipeline/standard_pdf_pipeline.py
@@ -7,7 +7,7 @@ from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
 from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.datamodel.base_models import AssembledUnit, Page
-from docling.datamodel.document import ConversionResult, InputDocument
+from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
    EasyOcrOptions,
    PdfPipelineOptions,
@@ -27,6 +27,7 @@ from docling.models.table_structure_model import TableStructureModel
 from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
 from docling.models.tesseract_ocr_model import TesseractOcrModel
 from docling.pipeline.base_pipeline import PaginatedPipeline
+from docling.utils.profiling import ProfilingScope, TimeRecorder

 _log = logging.getLogger(__name__)

@@ -119,73 +120,75 @@ class StandardPdfPipeline(PaginatedPipeline):
            )
        return None

-    def initialize_page(self, doc: InputDocument, page: Page) -> Page:
-        page._backend = doc._backend.load_page(page.page_no)  # type: ignore
-        if page._backend is not None and page._backend.is_valid():
-            page.size = page._backend.get_size()
+    def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:
+        with TimeRecorder(conv_res, "page_init"):
+            page._backend = conv_res.input._backend.load_page(page.page_no)  # type: ignore
+            if page._backend is not None and page._backend.is_valid():
+                page.size = page._backend.get_size()

        return page

-    def _assemble_document(
-        self, in_doc: InputDocument, conv_res: ConversionResult
-    ) -> ConversionResult:
+    def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult:
        all_elements = []
        all_headers = []
        all_body = []

-        for p in conv_res.pages:
-            if p.assembled is not None:
-                for el in p.assembled.body:
-                    all_body.append(el)
-                for el in p.assembled.headers:
-                    all_headers.append(el)
-                for el in p.assembled.elements:
-                    all_elements.append(el)
+        with TimeRecorder(conv_res, "doc_assemble", scope=ProfilingScope.DOCUMENT):
+            for p in conv_res.pages:
+                if p.assembled is not None:
+                    for el in p.assembled.body:
+                        all_body.append(el)
+                    for el in p.assembled.headers:
+                        all_headers.append(el)
+                    for el in p.assembled.elements:
+                        all_elements.append(el)

-        conv_res.assembled = AssembledUnit(
-            elements=all_elements, headers=all_headers, body=all_body
-        )
+            conv_res.assembled = AssembledUnit(
+                elements=all_elements, headers=all_headers, body=all_body
+            )

-        conv_res.document = self.glm_model(conv_res)
+            conv_res.document = self.glm_model(conv_res)

-        # Generate page images in the output
-        if self.pipeline_options.generate_page_images:
-            for page in conv_res.pages:
-                assert page.image is not None
-                page_no = page.page_no + 1
-                conv_res.document.pages[page_no].image = ImageRef.from_pil(
-                    page.image, dpi=int(72 * self.pipeline_options.images_scale)
-                )
-
-        # Generate images of the requested element types
-        if (
-            self.pipeline_options.generate_picture_images
-            or self.pipeline_options.generate_table_images
-        ):
-            scale = self.pipeline_options.images_scale
-            for element, _level in conv_res.document.iterate_items():
-                if not isinstance(element, DocItem) or len(element.prov) == 0:
-                    continue
-                if (
-                    isinstance(element, PictureItem)
-                    and self.pipeline_options.generate_picture_images
-                ) or (
-                    isinstance(element, TableItem)
-                    and self.pipeline_options.generate_table_images
-                ):
-                    page_ix = element.prov[0].page_no - 1
-                    page = conv_res.pages[page_ix]
-                    assert page.size is not None
+            # Generate page images in the output
+            if self.pipeline_options.generate_page_images:
+                for page in conv_res.pages:
                    assert page.image is not None
-
-                    crop_bbox = (
-                        element.prov[0]
-                        .bbox.scaled(scale=scale)
-                        .to_top_left_origin(page_height=page.size.height * scale)
+                    page_no = page.page_no + 1
+                    conv_res.document.pages[page_no].image = ImageRef.from_pil(
+                        page.image, dpi=int(72 * self.pipeline_options.images_scale)
                    )

-                    cropped_im = page.image.crop(crop_bbox.as_tuple())
-                    element.image = ImageRef.from_pil(cropped_im, dpi=int(72 * scale))
+            # Generate images of the requested element types
+            if (
+                self.pipeline_options.generate_picture_images
+                or self.pipeline_options.generate_table_images
+            ):
+                scale = self.pipeline_options.images_scale
+                for element, _level in conv_res.document.iterate_items():
+                    if not isinstance(element, DocItem) or len(element.prov) == 0:
+                        continue
+                    if (
+                        isinstance(element, PictureItem)
+                        and self.pipeline_options.generate_picture_images
+                    ) or (
+                        isinstance(element, TableItem)
+                        and self.pipeline_options.generate_table_images
+                    ):
+                        page_ix = element.prov[0].page_no - 1
+                        page = conv_res.pages[page_ix]
+                        assert page.size is not None
+                        assert page.image is not None
+
+                        crop_bbox = (
+                            element.prov[0]
+                            .bbox.scaled(scale=scale)
+                            .to_top_left_origin(page_height=page.size.height * scale)
+                        )
+
+                        cropped_im = page.image.crop(crop_bbox.as_tuple())
+                        element.image = ImageRef.from_pil(
+                            cropped_im, dpi=int(72 * scale)
+                        )

        return conv_res

--- a/docling/utils/profiling.py
+++ b/docling/utils/profiling.py
@@ -0,0 +1,62 @@
+import time
+from datetime import datetime
+from enum import Enum
+from typing import TYPE_CHECKING, List
+
+import numpy as np
+from pydantic import BaseModel
+
+from docling.datamodel.settings import settings
+
+if TYPE_CHECKING:
+    from docling.datamodel.document import ConversionResult
+
+
+class ProfilingScope(str, Enum):
+    PAGE = "page"
+    DOCUMENT = "document"
+
+
+class ProfilingItem(BaseModel):
+    scope: ProfilingScope
+    count: int = 0
+    times: List[float] = []
+    start_timestamps: List[datetime] = []
+
+    def avg(self) -> float:
+        return np.average(self.times)  # type: ignore
+
+    def std(self) -> float:
+        return np.std(self.times)  # type: ignore
+
+    def mean(self) -> float:
+        return np.mean(self.times)  # type: ignore
+
+    def percentile(self, perc: float) -> float:
+        return np.percentile(self.times, perc)  # type: ignore
+
+
+class TimeRecorder:
+    def __init__(
+        self,
+        conv_res: "ConversionResult",
+        key: str,
+        scope: ProfilingScope = ProfilingScope.PAGE,
+    ):
+        if settings.debug.profile_pipeline_timings:
+            if key not in conv_res.timings.keys():
+                conv_res.timings[key] = ProfilingItem(scope=scope)
+            self.conv_res = conv_res
+            self.key = key
+
+    def __enter__(self):
+        if settings.debug.profile_pipeline_timings:
+            self.start = time.monotonic()
+            self.conv_res.timings[self.key].start_timestamps.append(datetime.utcnow())
+        return self
+
+    def __exit__(self, *args):
+        if settings.debug.profile_pipeline_timings:
+            elapsed = time.monotonic() - self.start
+            self.conv_res.timings[self.key].times.append(elapsed)
+            self.conv_res.timings[self.key].count += 1