Add profiling code to all models

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2025-07-31 14:34:40 +00:00 · 2024-10-28 15:04:09 +01:00 · 2024-10-28 15:04:09 +01:00 · 0814f32ae4
commit 0814f32ae4
parent a00f01cf07
15 changed files with 644 additions and 527 deletions
--- a/docling/datamodel/document.py
+++ b/docling/datamodel/document.py
@ -6,6 +6,7 @@ from pathlib import Path, PurePath
 from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Type, Union
 import filetype
 import numpy as np
 from docling_core.types.doc import (
    DocItem,
    DocItemLabel,
@ -179,6 +180,29 @@ class DocumentFormat(str, Enum):
    V1 = "v1"
 class ProfilingScope(str, Enum):
    PAGE = "page"
    DOCUMENT = "document"
 class ProfilingItem(BaseModel):
    scope: ProfilingScope
    count: int = 0
    times: List[float] = []
    def avg(self) -> float:
        return np.average(self.times)  # type: ignore
    def std(self) -> float:
        return np.std(self.times)  # type: ignore
    def mean(self) -> float:
        return np.mean(self.times)  # type: ignore
    def percentile(self, perc: float) -> float:
        return np.percentile(self.times, perc)  # type: ignore
 class ConversionResult(BaseModel):
    input: InputDocument
@ -187,6 +211,7 @@ class ConversionResult(BaseModel):
    pages: List[Page] = []
    assembled: AssembledUnit = AssembledUnit()
    timings: Dict[str, ProfilingItem] = {}
    document: DoclingDocument = _EMPTY_DOCLING_DOC
--- a/docling/datamodel/settings.py
+++ b/docling/datamodel/settings.py
@ -32,6 +32,8 @@ class DebugSettings(BaseModel):
    visualize_layout: bool = False
    visualize_tables: bool = False
    profile_pipeline_timings: bool = False
 class AppSettings(BaseSettings):
    perf: BatchConcurrencySettings
--- a/docling/models/base_model.py
+++ b/docling/models/base_model.py
@ -1,14 +1,19 @@
 import time
 from abc import ABC, abstractmethod
-from typing import Any, Iterable
+from typing import Any, Callable, Iterable, Type
 from docling_core.types.doc import DoclingDocument, NodeItem
 from docling.datamodel.base_models import Page
 from docling.datamodel.document import ConversionResult, ProfilingItem, ProfilingScope
 from docling.datamodel.settings import settings
 class BasePageModel(ABC):
    @abstractmethod
-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
        self, conv_res: ConversionResult, page_batch: Iterable[Page]
    ) -> Iterable[Page]:
        pass
@ -23,3 +28,28 @@ class BaseEnrichmentModel(ABC):
        self, doc: DoclingDocument, element_batch: Iterable[NodeItem]
    ) -> Iterable[Any]:
        pass
 class TimeRecorder:
    def __init__(
        self,
        conv_res: ConversionResult,
        key: str,
        scope: ProfilingScope = ProfilingScope.PAGE,
    ):
        if settings.debug.profile_pipeline_timings:
            if key not in conv_res.timings.keys():
                conv_res.timings[key] = ProfilingItem(scope=scope)
            self.conv_res = conv_res
            self.key = key
    def __enter__(self):
        if settings.debug.profile_pipeline_timings:
            self.start = time.monotonic()
        return self
    def __exit__(self, *args):
        if settings.debug.profile_pipeline_timings:
            elapsed = time.monotonic() - self.start
            self.conv_res.timings[self.key].times.append(elapsed)
            self.conv_res.timings[self.key].count += 1
--- a/docling/models/base_ocr_model.py
+++ b/docling/models/base_ocr_model.py
@ -10,12 +10,14 @@ from rtree import index
 from scipy.ndimage import find_objects, label
 from docling.datamodel.base_models import OcrCell, Page
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import OcrOptions
 from docling.models.base_model import BasePageModel
 _log = logging.getLogger(__name__)
-class BaseOcrModel:
+class BaseOcrModel(BasePageModel):
    def __init__(self, enabled: bool, options: OcrOptions):
        self.enabled = enabled
        self.options = options
@ -133,5 +135,7 @@ class BaseOcrModel:
        image.show()
    @abstractmethod
-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
        self, conv_res: ConversionResult, page_batch: Iterable[Page]
    ) -> Iterable[Page]:
        pass
--- a/docling/models/ds_glm_model.py
+++ b/docling/models/ds_glm_model.py
@ -27,6 +27,7 @@ from pydantic import BaseModel, ConfigDict
 from docling.datamodel.base_models import Cluster, FigureElement, Table, TextElement
 from docling.datamodel.document import ConversionResult, layout_label_to_ds_type
 from docling.models.base_model import TimeRecorder
 from docling.utils.utils import create_hash
@ -226,12 +227,13 @@ class GlmModel:
        return ds_doc
    def __call__(self, conv_res: ConversionResult) -> DoclingDocument:
-        ds_doc = self._to_legacy_document(conv_res)
+        with TimeRecorder(conv_res, "glm"):
-        ds_doc_dict = ds_doc.model_dump(by_alias=True)
+            ds_doc = self._to_legacy_document(conv_res)
            ds_doc_dict = ds_doc.model_dump(by_alias=True)
-        glm_doc = self.model.apply_on_doc(ds_doc_dict)
+            glm_doc = self.model.apply_on_doc(ds_doc_dict)
-        docling_doc: DoclingDocument = to_docling_document(glm_doc)  # Experimental
+            docling_doc: DoclingDocument = to_docling_document(glm_doc)  # Experimental
        # DEBUG code:
        def draw_clusters_and_cells(ds_document, page_no):
--- a/docling/models/easyocr_model.py
+++ b/docling/models/easyocr_model.py
@ -1,12 +1,15 @@
 import logging
 import time
 from typing import Iterable
 import numpy
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling.datamodel.base_models import OcrCell, Page
 from docling.datamodel.document import ConversionResult, ProfilingItem
 from docling.datamodel.pipeline_options import EasyOcrOptions
 from docling.datamodel.settings import settings
 from docling.models.base_model import TimeRecorder
 from docling.models.base_ocr_model import BaseOcrModel
 _log = logging.getLogger(__name__)
@ -34,56 +37,62 @@ class EasyOcrModel(BaseOcrModel):
                download_enabled=self.options.download_enabled,
            )
-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
        self, conv_res: ConversionResult, page_batch: Iterable[Page]
    ) -> Iterable[Page]:
        if not self.enabled:
            yield from page_batch
            return
        for page in page_batch:
            assert page._backend is not None
            if not page._backend.is_valid():
                yield page
            else:
-                ocr_rects = self.get_ocr_rects(page)
+                with TimeRecorder(conv_res, "ocr"):
                    ocr_rects = self.get_ocr_rects(page)
-                all_ocr_cells = []
+                    all_ocr_cells = []
-                for ocr_rect in ocr_rects:
+                    for ocr_rect in ocr_rects:
-                    # Skip zero area boxes
+                        # Skip zero area boxes
-                    if ocr_rect.area() == 0:
+                        if ocr_rect.area() == 0:
-                        continue
+                            continue
-                    high_res_image = page._backend.get_page_image(
+                        high_res_image = page._backend.get_page_image(
-                        scale=self.scale, cropbox=ocr_rect
+                            scale=self.scale, cropbox=ocr_rect
                    )
                    im = numpy.array(high_res_image)
                    result = self.reader.readtext(im)
                    del high_res_image
                    del im
                    cells = [
                        OcrCell(
                            id=ix,
                            text=line[1],
                            confidence=line[2],
                            bbox=BoundingBox.from_tuple(
                                coord=(
                                    (line[0][0][0] / self.scale) + ocr_rect.l,
                                    (line[0][0][1] / self.scale) + ocr_rect.t,
                                    (line[0][2][0] / self.scale) + ocr_rect.l,
                                    (line[0][2][1] / self.scale) + ocr_rect.t,
                                ),
                                origin=CoordOrigin.TOPLEFT,
                            ),
                        )
-                        for ix, line in enumerate(result)
+                        im = numpy.array(high_res_image)
-                    ]
+                        result = self.reader.readtext(im)
                    all_ocr_cells.extend(cells)
-                ## Remove OCR cells which overlap with programmatic cells.
+                        del high_res_image
-                filtered_ocr_cells = self.filter_ocr_cells(all_ocr_cells, page.cells)
+                        del im
-                page.cells.extend(filtered_ocr_cells)
+                        cells = [
                            OcrCell(
                                id=ix,
                                text=line[1],
                                confidence=line[2],
                                bbox=BoundingBox.from_tuple(
                                    coord=(
                                        (line[0][0][0] / self.scale) + ocr_rect.l,
                                        (line[0][0][1] / self.scale) + ocr_rect.t,
                                        (line[0][2][0] / self.scale) + ocr_rect.l,
                                        (line[0][2][1] / self.scale) + ocr_rect.t,
                                    ),
                                    origin=CoordOrigin.TOPLEFT,
                                ),
                            )
                            for ix, line in enumerate(result)
                        ]
                        all_ocr_cells.extend(cells)
                    ## Remove OCR cells which overlap with programmatic cells.
                    filtered_ocr_cells = self.filter_ocr_cells(
                        all_ocr_cells, page.cells
                    )
                    page.cells.extend(filtered_ocr_cells)
                # DEBUG code:
                if settings.debug.visualize_ocr:
--- a/docling/models/layout_model.py
+++ b/docling/models/layout_model.py
@ -16,8 +16,9 @@ from docling.datamodel.base_models import (
    LayoutPrediction,
    Page,
 )
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.settings import settings
-from docling.models.base_model import BasePageModel
+from docling.models.base_model import BasePageModel, TimeRecorder
 from docling.utils import layout_utils as lu
 _log = logging.getLogger(__name__)
@ -272,77 +273,86 @@ class LayoutModel(BasePageModel):
        return clusters_out_new, cells_out_new
-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
        self, conv_res: ConversionResult, page_batch: Iterable[Page]
    ) -> Iterable[Page]:
        for page in page_batch:
            assert page._backend is not None
            if not page._backend.is_valid():
                yield page
            else:
-                assert page.size is not None
+                with TimeRecorder(conv_res, "layout"):
                    assert page.size is not None
-                clusters = []
+                    clusters = []
-                for ix, pred_item in enumerate(
+                    for ix, pred_item in enumerate(
-                    self.layout_predictor.predict(page.get_image(scale=1.0))
+                        self.layout_predictor.predict(page.get_image(scale=1.0))
-                ):
+                    ):
-                    label = DocItemLabel(
+                        label = DocItemLabel(
-                        pred_item["label"].lower().replace(" ", "_").replace("-", "_")
+                            pred_item["label"]
-                    )  # Temporary, until docling-ibm-model uses docling-core types
+                            .lower()
-                    cluster = Cluster(
+                            .replace(" ", "_")
-                        id=ix,
+                            .replace("-", "_")
-                        label=label,
+                        )  # Temporary, until docling-ibm-model uses docling-core types
-                        confidence=pred_item["confidence"],
+                        cluster = Cluster(
-                        bbox=BoundingBox.model_validate(pred_item),
+                            id=ix,
-                        cells=[],
+                            label=label,
                            confidence=pred_item["confidence"],
                            bbox=BoundingBox.model_validate(pred_item),
                            cells=[],
                        )
                        clusters.append(cluster)
                    # Map cells to clusters
                    # TODO: Remove, postprocess should take care of it anyway.
                    for cell in page.cells:
                        for cluster in clusters:
                            if not cell.bbox.area() > 0:
                                overlap_frac = 0.0
                            else:
                                overlap_frac = (
                                    cell.bbox.intersection_area_with(cluster.bbox)
                                    / cell.bbox.area()
                                )
                            if overlap_frac > 0.5:
                                cluster.cells.append(cell)
                    # Pre-sort clusters
                    # clusters = self.sort_clusters_by_cell_order(clusters)
                    # DEBUG code:
                    def draw_clusters_and_cells(show: bool = True):
                        image = copy.deepcopy(page.image)
                        if image is not None:
                            draw = ImageDraw.Draw(image)
                            for c in clusters:
                                x0, y0, x1, y1 = c.bbox.as_tuple()
                                draw.rectangle([(x0, y0), (x1, y1)], outline="green")
                                cell_color = (
                                    random.randint(30, 140),
                                    random.randint(30, 140),
                                    random.randint(30, 140),
                                )
                                for tc in c.cells:  # [:1]:
                                    x0, y0, x1, y1 = tc.bbox.as_tuple()
                                    draw.rectangle(
                                        [(x0, y0), (x1, y1)], outline=cell_color
                                    )
                            if show:
                                image.show()
                    # draw_clusters_and_cells()
                    clusters, page.cells = self.postprocess(
                        clusters, page.cells, page.size.height
                    )
                    clusters.append(cluster)
-                # Map cells to clusters
+                    page.predictions.layout = LayoutPrediction(clusters=clusters)
                # TODO: Remove, postprocess should take care of it anyway.
                for cell in page.cells:
                    for cluster in clusters:
                        if not cell.bbox.area() > 0:
                            overlap_frac = 0.0
                        else:
                            overlap_frac = (
                                cell.bbox.intersection_area_with(cluster.bbox)
                                / cell.bbox.area()
                            )
                        if overlap_frac > 0.5:
                            cluster.cells.append(cell)
                # Pre-sort clusters
                # clusters = self.sort_clusters_by_cell_order(clusters)
                # DEBUG code:
                def draw_clusters_and_cells(show: bool = True):
                    image = copy.deepcopy(page.image)
                    if image is not None:
                        draw = ImageDraw.Draw(image)
                        for c in clusters:
                            x0, y0, x1, y1 = c.bbox.as_tuple()
                            draw.rectangle([(x0, y0), (x1, y1)], outline="green")
                            cell_color = (
                                random.randint(30, 140),
                                random.randint(30, 140),
                                random.randint(30, 140),
                            )
                            for tc in c.cells:  # [:1]:
                                x0, y0, x1, y1 = tc.bbox.as_tuple()
                                draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
                        if show:
                            image.show()
                # draw_clusters_and_cells()
                clusters, page.cells = self.postprocess(
                    clusters, page.cells, page.size.height
                )
                if settings.debug.visualize_layout:
                    draw_clusters_and_cells()
                page.predictions.layout = LayoutPrediction(clusters=clusters)
                yield page
--- a/docling/models/page_assemble_model.py
+++ b/docling/models/page_assemble_model.py
@ -12,7 +12,8 @@ from docling.datamodel.base_models import (
    Table,
    TextElement,
 )
-from docling.models.base_model import BasePageModel
+from docling.datamodel.document import ConversionResult
 from docling.models.base_model import BasePageModel, TimeRecorder
 from docling.models.layout_model import LayoutModel
 _log = logging.getLogger(__name__)
@ -51,122 +52,122 @@ class PageAssembleModel(BasePageModel):
        return sanitized_text.strip()  # Strip any leading or trailing whitespace
-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
        self, conv_res: ConversionResult, page_batch: Iterable[Page]
    ) -> Iterable[Page]:
        for page in page_batch:
            assert page._backend is not None
            if not page._backend.is_valid():
                yield page
            else:
-                assert page.predictions.layout is not None
+                with TimeRecorder(conv_res, "page_assemble"):
-                # assembles some JSON output page by page.
+                    assert page.predictions.layout is not None
-                elements: List[PageElement] = []
+                    # assembles some JSON output page by page.
                headers: List[PageElement] = []
                body: List[PageElement] = []
-                for cluster in page.predictions.layout.clusters:
+                    elements: List[PageElement] = []
-                    # _log.info("Cluster label seen:", cluster.label)
+                    headers: List[PageElement] = []
-                    if cluster.label in LayoutModel.TEXT_ELEM_LABELS:
+                    body: List[PageElement] = []
-                        textlines = [
+                    for cluster in page.predictions.layout.clusters:
-                            cell.text.replace("\x02", "-").strip()
+                        # _log.info("Cluster label seen:", cluster.label)
-                            for cell in cluster.cells
+                        if cluster.label in LayoutModel.TEXT_ELEM_LABELS:
                            if len(cell.text.strip()) > 0
                        ]
                        text = self.sanitize_text(textlines)
                        text_el = TextElement(
                            label=cluster.label,
                            id=cluster.id,
                            text=text,
                            page_no=page.page_no,
                            cluster=cluster,
                        )
                        elements.append(text_el)
-                        if cluster.label in LayoutModel.PAGE_HEADER_LABELS:
+                            textlines = [
-                            headers.append(text_el)
+                                cell.text.replace("\x02", "-").strip()
-                        else:
+                                for cell in cluster.cells
-                            body.append(text_el)
+                                if len(cell.text.strip()) > 0
-                    elif cluster.label == LayoutModel.TABLE_LABEL:
+                            ]
-                        tbl = None
+                            text = self.sanitize_text(textlines)
-                        if page.predictions.tablestructure:
+                            text_el = TextElement(
                            tbl = page.predictions.tablestructure.table_map.get(
                                cluster.id, None
                            )
                        if (
                            not tbl
                        ):  # fallback: add table without structure, if it isn't present
                            tbl = Table(
                                label=cluster.label,
                                id=cluster.id,
                                text="",
                                otsl_seq=[],
                                table_cells=[],
                                cluster=cluster,
                                page_no=page.page_no,
                            )
                        elements.append(tbl)
                        body.append(tbl)
                    elif cluster.label == LayoutModel.FIGURE_LABEL:
                        fig = None
                        if page.predictions.figures_classification:
                            fig = (
                                page.predictions.figures_classification.figure_map.get(
                                    cluster.id, None
                                )
                            )
                        if (
                            not fig
                        ):  # fallback: add figure without classification, if it isn't present
                            fig = FigureElement(
                                label=cluster.label,
                                id=cluster.id,
                                text="",
                                data=None,
                                cluster=cluster,
                                page_no=page.page_no,
                            )
                        elements.append(fig)
                        body.append(fig)
                    elif cluster.label == LayoutModel.FORMULA_LABEL:
                        equation = None
                        if page.predictions.equations_prediction:
                            equation = (
                                page.predictions.equations_prediction.equation_map.get(
                                    cluster.id, None
                                )
                            )
                        if (
                            not equation
                        ):  # fallback: add empty formula, if it isn't present
                            text = self.sanitize_text(
                                [
                                    cell.text.replace("\x02", "-").strip()
                                    for cell in cluster.cells
                                    if len(cell.text.strip()) > 0
                                ]
                            )
                            equation = TextElement(
                                label=cluster.label,
                                id=cluster.id,
                                cluster=cluster,
                                page_no=page.page_no,
                                text=text,
                                page_no=page.page_no,
                                cluster=cluster,
                            )
-                        elements.append(equation)
+                            elements.append(text_el)
                        body.append(equation)
-                page.assembled = AssembledUnit(
+                            if cluster.label in LayoutModel.PAGE_HEADER_LABELS:
-                    elements=elements, headers=headers, body=body
+                                headers.append(text_el)
-                )
+                            else:
                                body.append(text_el)
                        elif cluster.label == LayoutModel.TABLE_LABEL:
                            tbl = None
                            if page.predictions.tablestructure:
                                tbl = page.predictions.tablestructure.table_map.get(
                                    cluster.id, None
                                )
                            if (
                                not tbl
                            ):  # fallback: add table without structure, if it isn't present
                                tbl = Table(
                                    label=cluster.label,
                                    id=cluster.id,
                                    text="",
                                    otsl_seq=[],
                                    table_cells=[],
                                    cluster=cluster,
                                    page_no=page.page_no,
                                )
-                # Remove page images (can be disabled)
+                            elements.append(tbl)
-                if not self.options.keep_images:
+                            body.append(tbl)
-                    page._image_cache = {}
+                        elif cluster.label == LayoutModel.FIGURE_LABEL:
                            fig = None
                            if page.predictions.figures_classification:
                                fig = page.predictions.figures_classification.figure_map.get(
                                    cluster.id, None
                                )
                            if (
                                not fig
                            ):  # fallback: add figure without classification, if it isn't present
                                fig = FigureElement(
                                    label=cluster.label,
                                    id=cluster.id,
                                    text="",
                                    data=None,
                                    cluster=cluster,
                                    page_no=page.page_no,
                                )
                            elements.append(fig)
                            body.append(fig)
                        elif cluster.label == LayoutModel.FORMULA_LABEL:
                            equation = None
                            if page.predictions.equations_prediction:
                                equation = page.predictions.equations_prediction.equation_map.get(
                                    cluster.id, None
                                )
                            if (
                                not equation
                            ):  # fallback: add empty formula, if it isn't present
                                text = self.sanitize_text(
                                    [
                                        cell.text.replace("\x02", "-").strip()
                                        for cell in cluster.cells
                                        if len(cell.text.strip()) > 0
                                    ]
                                )
                                equation = TextElement(
                                    label=cluster.label,
                                    id=cluster.id,
                                    cluster=cluster,
                                    page_no=page.page_no,
                                    text=text,
                                )
                            elements.append(equation)
                            body.append(equation)
-                # Unload backend
+                    page.assembled = AssembledUnit(
-                page._backend.unload()
+                        elements=elements, headers=headers, body=body
                    )
                    # Remove page images (can be disabled)
                    if not self.options.keep_images:
                        page._image_cache = {}
                    # Unload backend
                    page._backend.unload()
                yield page
--- a/docling/models/page_preprocessing_model.py
+++ b/docling/models/page_preprocessing_model.py
@ -4,7 +4,8 @@ from PIL import ImageDraw
 from pydantic import BaseModel
 from docling.datamodel.base_models import Page
-from docling.models.base_model import BasePageModel
+from docling.datamodel.document import ConversionResult
 from docling.models.base_model import BasePageModel, TimeRecorder
 class PagePreprocessingOptions(BaseModel):
@ -15,14 +16,17 @@ class PagePreprocessingModel(BasePageModel):
    def __init__(self, options: PagePreprocessingOptions):
        self.options = options
-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
        self, conv_res: ConversionResult, page_batch: Iterable[Page]
    ) -> Iterable[Page]:
        for page in page_batch:
            assert page._backend is not None
            if not page._backend.is_valid():
                yield page
            else:
-                page = self._populate_page_images(page)
+                with TimeRecorder(conv_res, "page_parse"):
-                page = self._parse_page_cells(page)
+                    page = self._populate_page_images(page)
                    page = self._parse_page_cells(page)
                yield page
    # Generate the page image and store it in the page object
--- a/docling/models/table_structure_model.py
+++ b/docling/models/table_structure_model.py
@ -8,9 +8,10 @@ from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredic
 from PIL import ImageDraw
 from docling.datamodel.base_models import Page, Table, TableStructurePrediction
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import TableFormerMode, TableStructureOptions
 from docling.datamodel.settings import settings
-from docling.models.base_model import BasePageModel
+from docling.models.base_model import BasePageModel, TimeRecorder
 class TableStructureModel(BasePageModel):
@ -64,7 +65,9 @@ class TableStructureModel(BasePageModel):
        image.show()
-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
        self, conv_res: ConversionResult, page_batch: Iterable[Page]
    ) -> Iterable[Page]:
        if not self.enabled:
            yield from page_batch
@ -75,96 +78,105 @@ class TableStructureModel(BasePageModel):
            if not page._backend.is_valid():
                yield page
            else:
                with TimeRecorder(conv_res, "table_structure"):
-                assert page.predictions.layout is not None
+                    assert page.predictions.layout is not None
-                assert page.size is not None
+                    assert page.size is not None
-                page.predictions.tablestructure = TableStructurePrediction()  # dummy
+                    page.predictions.tablestructure = (
                        TableStructurePrediction()
                    )  # dummy
-                in_tables = [
+                    in_tables = [
-                    (
+                        (
-                        cluster,
+                            cluster,
-                        [
+                            [
-                            round(cluster.bbox.l) * self.scale,
+                                round(cluster.bbox.l) * self.scale,
-                            round(cluster.bbox.t) * self.scale,
+                                round(cluster.bbox.t) * self.scale,
-                            round(cluster.bbox.r) * self.scale,
+                                round(cluster.bbox.r) * self.scale,
-                            round(cluster.bbox.b) * self.scale,
+                                round(cluster.bbox.b) * self.scale,
-                        ],
+                            ],
                        )
                        for cluster in page.predictions.layout.clusters
                        if cluster.label == DocItemLabel.TABLE
                    ]
                    if not len(in_tables):
                        yield page
                        continue
                    tokens = []
                    for c in page.cells:
                        for cluster, _ in in_tables:
                            if c.bbox.area() > 0:
                                if (
                                    c.bbox.intersection_area_with(cluster.bbox)
                                    / c.bbox.area()
                                    > 0.2
                                ):
                                    # Only allow non empty stings (spaces) into the cells of a table
                                    if len(c.text.strip()) > 0:
                                        new_cell = copy.deepcopy(c)
                                        new_cell.bbox = new_cell.bbox.scaled(
                                            scale=self.scale
                                        )
                                        tokens.append(new_cell.model_dump())
                    page_input = {
                        "tokens": tokens,
                        "width": page.size.width * self.scale,
                        "height": page.size.height * self.scale,
                    }
                    page_input["image"] = numpy.asarray(
                        page.get_image(scale=self.scale)
                    )
                    for cluster in page.predictions.layout.clusters
                    if cluster.label == DocItemLabel.TABLE
                ]
                if not len(in_tables):
                    yield page
                    continue
-                tokens = []
+                    table_clusters, table_bboxes = zip(*in_tables)
-                for c in page.cells:
+
-                    for cluster, _ in in_tables:
+                    if len(table_bboxes):
-                        if c.bbox.area() > 0:
+                        tf_output = self.tf_predictor.multi_table_predict(
-                            if (
+                            page_input, table_bboxes, do_matching=self.do_cell_matching
-                                c.bbox.intersection_area_with(cluster.bbox)
+                        )
-                                / c.bbox.area()
+
-                                > 0.2
+                        for table_cluster, table_out in zip(table_clusters, tf_output):
-                            ):
+                            table_cells = []
-                                # Only allow non empty stings (spaces) into the cells of a table
+                            for element in table_out["tf_responses"]:
-                                if len(c.text.strip()) > 0:
+
-                                    new_cell = copy.deepcopy(c)
+                                if not self.do_cell_matching:
-                                    new_cell.bbox = new_cell.bbox.scaled(
+                                    the_bbox = BoundingBox.model_validate(
-                                        scale=self.scale
+                                        element["bbox"]
                                    ).scaled(1 / self.scale)
                                    text_piece = page._backend.get_text_in_rect(
                                        the_bbox
                                    )
                                    element["bbox"]["token"] = text_piece
-                                    tokens.append(new_cell.model_dump())
+                                tc = TableCell.model_validate(element)
                                if self.do_cell_matching and tc.bbox is not None:
                                    tc.bbox = tc.bbox.scaled(1 / self.scale)
                                table_cells.append(tc)
-                page_input = {
+                            # Retrieving cols/rows, after post processing:
-                    "tokens": tokens,
+                            num_rows = table_out["predict_details"]["num_rows"]
-                    "width": page.size.width * self.scale,
+                            num_cols = table_out["predict_details"]["num_cols"]
-                    "height": page.size.height * self.scale,
+                            otsl_seq = table_out["predict_details"]["prediction"][
-                }
+                                "rs_seq"
-                page_input["image"] = numpy.asarray(page.get_image(scale=self.scale))
+                            ]
-                table_clusters, table_bboxes = zip(*in_tables)
+                            tbl = Table(
                                otsl_seq=otsl_seq,
                                table_cells=table_cells,
                                num_rows=num_rows,
                                num_cols=num_cols,
                                id=table_cluster.id,
                                page_no=page.page_no,
                                cluster=table_cluster,
                                label=DocItemLabel.TABLE,
                            )
-                if len(table_bboxes):
+                            page.predictions.tablestructure.table_map[
-                    tf_output = self.tf_predictor.multi_table_predict(
+                                table_cluster.id
-                        page_input, table_bboxes, do_matching=self.do_cell_matching
+                            ] = tbl
                    )
                    for table_cluster, table_out in zip(table_clusters, tf_output):
                        table_cells = []
                        for element in table_out["tf_responses"]:
                            if not self.do_cell_matching:
                                the_bbox = BoundingBox.model_validate(
                                    element["bbox"]
                                ).scaled(1 / self.scale)
                                text_piece = page._backend.get_text_in_rect(the_bbox)
                                element["bbox"]["token"] = text_piece
                            tc = TableCell.model_validate(element)
                            if self.do_cell_matching and tc.bbox is not None:
                                tc.bbox = tc.bbox.scaled(1 / self.scale)
                            table_cells.append(tc)
                        # Retrieving cols/rows, after post processing:
                        num_rows = table_out["predict_details"]["num_rows"]
                        num_cols = table_out["predict_details"]["num_cols"]
                        otsl_seq = table_out["predict_details"]["prediction"]["rs_seq"]
                        tbl = Table(
                            otsl_seq=otsl_seq,
                            table_cells=table_cells,
                            num_rows=num_rows,
                            num_cols=num_cols,
                            id=table_cluster.id,
                            page_no=page.page_no,
                            cluster=table_cluster,
                            label=DocItemLabel.TABLE,
                        )
                        page.predictions.tablestructure.table_map[table_cluster.id] = (
                            tbl
                        )
                    # For debugging purposes:
                    if settings.debug.visualize_tables:
--- a/docling/models/tesseract_ocr_cli_model.py
+++ b/docling/models/tesseract_ocr_cli_model.py
@ -8,8 +8,10 @@ import pandas as pd
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling.datamodel.base_models import OcrCell, Page
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import TesseractCliOcrOptions
 from docling.datamodel.settings import settings
 from docling.models.base_model import TimeRecorder
 from docling.models.base_ocr_model import BaseOcrModel
 _log = logging.getLogger(__name__)
@ -103,7 +105,9 @@ class TesseractOcrCliModel(BaseOcrModel):
        return df_filtered
-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
        self, conv_res: ConversionResult, page_batch: Iterable[Page]
    ) -> Iterable[Page]:
        if not self.enabled:
            yield from page_batch
@ -114,60 +118,64 @@ class TesseractOcrCliModel(BaseOcrModel):
            if not page._backend.is_valid():
                yield page
            else:
-                ocr_rects = self.get_ocr_rects(page)
+                with TimeRecorder(conv_res, "ocr"):
-                all_ocr_cells = []
+                    ocr_rects = self.get_ocr_rects(page)
-                for ocr_rect in ocr_rects:
+
-                    # Skip zero area boxes
+                    all_ocr_cells = []
-                    if ocr_rect.area() == 0:
+                    for ocr_rect in ocr_rects:
-                        continue
+                        # Skip zero area boxes
-                    high_res_image = page._backend.get_page_image(
+                        if ocr_rect.area() == 0:
-                        scale=self.scale, cropbox=ocr_rect
+                            continue
                        high_res_image = page._backend.get_page_image(
                            scale=self.scale, cropbox=ocr_rect
                        )
                        with tempfile.NamedTemporaryFile(
                            suffix=".png", mode="w"
                        ) as image_file:
                            fname = image_file.name
                            high_res_image.save(fname)
                            df = self._run_tesseract(fname)
                        # _log.info(df)
                        # Print relevant columns (bounding box and text)
                        for ix, row in df.iterrows():
                            text = row["text"]
                            conf = row["conf"]
                            l = float(row["left"])
                            b = float(row["top"])
                            w = float(row["width"])
                            h = float(row["height"])
                            t = b + h
                            r = l + w
                            cell = OcrCell(
                                id=ix,
                                text=text,
                                confidence=conf / 100.0,
                                bbox=BoundingBox.from_tuple(
                                    coord=(
                                        (l / self.scale) + ocr_rect.l,
                                        (b / self.scale) + ocr_rect.t,
                                        (r / self.scale) + ocr_rect.l,
                                        (t / self.scale) + ocr_rect.t,
                                    ),
                                    origin=CoordOrigin.TOPLEFT,
                                ),
                            )
                            all_ocr_cells.append(cell)
                    ## Remove OCR cells which overlap with programmatic cells.
                    filtered_ocr_cells = self.filter_ocr_cells(
                        all_ocr_cells, page.cells
                    )
-                    with tempfile.NamedTemporaryFile(
+                    page.cells.extend(filtered_ocr_cells)
                        suffix=".png", mode="w"
                    ) as image_file:
                        fname = image_file.name
                        high_res_image.save(fname)
                        df = self._run_tesseract(fname)
                    # _log.info(df)
                    # Print relevant columns (bounding box and text)
                    for ix, row in df.iterrows():
                        text = row["text"]
                        conf = row["conf"]
                        l = float(row["left"])
                        b = float(row["top"])
                        w = float(row["width"])
                        h = float(row["height"])
                        t = b + h
                        r = l + w
                        cell = OcrCell(
                            id=ix,
                            text=text,
                            confidence=conf / 100.0,
                            bbox=BoundingBox.from_tuple(
                                coord=(
                                    (l / self.scale) + ocr_rect.l,
                                    (b / self.scale) + ocr_rect.t,
                                    (r / self.scale) + ocr_rect.l,
                                    (t / self.scale) + ocr_rect.t,
                                ),
                                origin=CoordOrigin.TOPLEFT,
                            ),
                        )
                        all_ocr_cells.append(cell)
                ## Remove OCR cells which overlap with programmatic cells.
                filtered_ocr_cells = self.filter_ocr_cells(all_ocr_cells, page.cells)
                page.cells.extend(filtered_ocr_cells)
                # DEBUG code:
                if settings.debug.visualize_ocr:
--- a/docling/models/tesseract_ocr_model.py
+++ b/docling/models/tesseract_ocr_model.py
@ -4,8 +4,10 @@ from typing import Iterable
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling.datamodel.base_models import OcrCell, Page
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import TesseractOcrOptions
 from docling.datamodel.settings import settings
 from docling.models.base_model import TimeRecorder
 from docling.models.base_ocr_model import BaseOcrModel
 _log = logging.getLogger(__name__)
@ -62,7 +64,9 @@ class TesseractOcrModel(BaseOcrModel):
            # Finalize the tesseractAPI
            self.reader.End()
-    def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def __call__(
        self, conv_res: ConversionResult, page_batch: Iterable[Page]
    ) -> Iterable[Page]:
        if not self.enabled:
            yield from page_batch
@ -73,57 +77,63 @@ class TesseractOcrModel(BaseOcrModel):
            if not page._backend.is_valid():
                yield page
            else:
-                assert self.reader is not None
+                with TimeRecorder(conv_res, "ocr"):
-                ocr_rects = self.get_ocr_rects(page)
+                    assert self.reader is not None
-                all_ocr_cells = []
+                    ocr_rects = self.get_ocr_rects(page)
                for ocr_rect in ocr_rects:
                    # Skip zero area boxes
                    if ocr_rect.area() == 0:
                        continue
                    high_res_image = page._backend.get_page_image(
                        scale=self.scale, cropbox=ocr_rect
                    )
-                    # Retrieve text snippets with their bounding boxes
+                    all_ocr_cells = []
-                    self.reader.SetImage(high_res_image)
+                    for ocr_rect in ocr_rects:
-                    boxes = self.reader.GetComponentImages(
+                        # Skip zero area boxes
-                        self.reader_RIL.TEXTLINE, True
+                        if ocr_rect.area() == 0:
-                    )
+                            continue
-
+                        high_res_image = page._backend.get_page_image(
-                    cells = []
+                            scale=self.scale, cropbox=ocr_rect
                    for ix, (im, box, _, _) in enumerate(boxes):
                        # Set the area of interest. Tesseract uses Bottom-Left for the origin
                        self.reader.SetRectangle(box["x"], box["y"], box["w"], box["h"])
                        # Extract text within the bounding box
                        text = self.reader.GetUTF8Text().strip()
                        confidence = self.reader.MeanTextConf()
                        left = box["x"] / self.scale
                        bottom = box["y"] / self.scale
                        right = (box["x"] + box["w"]) / self.scale
                        top = (box["y"] + box["h"]) / self.scale
                        cells.append(
                            OcrCell(
                                id=ix,
                                text=text,
                                confidence=confidence,
                                bbox=BoundingBox.from_tuple(
                                    coord=(left, top, right, bottom),
                                    origin=CoordOrigin.TOPLEFT,
                                ),
                            )
                        )
-                    # del high_res_image
+                        # Retrieve text snippets with their bounding boxes
-                    all_ocr_cells.extend(cells)
+                        self.reader.SetImage(high_res_image)
                        boxes = self.reader.GetComponentImages(
                            self.reader_RIL.TEXTLINE, True
                        )
-                ## Remove OCR cells which overlap with programmatic cells.
+                        cells = []
-                filtered_ocr_cells = self.filter_ocr_cells(all_ocr_cells, page.cells)
+                        for ix, (im, box, _, _) in enumerate(boxes):
                            # Set the area of interest. Tesseract uses Bottom-Left for the origin
                            self.reader.SetRectangle(
                                box["x"], box["y"], box["w"], box["h"]
                            )
-                page.cells.extend(filtered_ocr_cells)
+                            # Extract text within the bounding box
                            text = self.reader.GetUTF8Text().strip()
                            confidence = self.reader.MeanTextConf()
                            left = box["x"] / self.scale
                            bottom = box["y"] / self.scale
                            right = (box["x"] + box["w"]) / self.scale
                            top = (box["y"] + box["h"]) / self.scale
                            cells.append(
                                OcrCell(
                                    id=ix,
                                    text=text,
                                    confidence=confidence,
                                    bbox=BoundingBox.from_tuple(
                                        coord=(left, top, right, bottom),
                                        origin=CoordOrigin.TOPLEFT,
                                    ),
                                )
                            )
                        # del high_res_image
                        all_ocr_cells.extend(cells)
                    ## Remove OCR cells which overlap with programmatic cells.
                    filtered_ocr_cells = self.filter_ocr_cells(
                        all_ocr_cells, page.cells
                    )
                    page.cells.extend(filtered_ocr_cells)
                # DEBUG code:
                if settings.debug.visualize_ocr:
--- a/docling/pipeline/base_pipeline.py
+++ b/docling/pipeline/base_pipeline.py
@ -15,10 +15,15 @@ from docling.datamodel.base_models import (
    ErrorItem,
    Page,
 )
-from docling.datamodel.document import ConversionResult, InputDocument
+from docling.datamodel.document import (
    ConversionResult,
    InputDocument,
    ProfilingItem,
    ProfilingScope,
 )
 from docling.datamodel.pipeline_options import PipelineOptions
 from docling.datamodel.settings import settings
-from docling.models.base_model import BaseEnrichmentModel
+from docling.models.base_model import BaseEnrichmentModel, TimeRecorder
 from docling.utils.utils import chunkify
 _log = logging.getLogger(__name__)
@ -37,11 +42,11 @@ class BasePipeline(ABC):
        try:
            # These steps are building and assembling the structure of the
            # output DoclingDocument
-            conv_res = self._build_document(in_doc, conv_res)
+            conv_res = self._build_document(conv_res)
-            conv_res = self._assemble_document(in_doc, conv_res)
+            conv_res = self._assemble_document(conv_res)
            # From this stage, all operations should rely only on conv_res.output
-            conv_res = self._enrich_document(in_doc, conv_res)
+            conv_res = self._enrich_document(conv_res)
-            conv_res.status = self._determine_status(in_doc, conv_res)
+            conv_res.status = self._determine_status(conv_res)
        except Exception as e:
            conv_res.status = ConversionStatus.FAILURE
            if raises_on_error:
@ -50,19 +55,13 @@ class BasePipeline(ABC):
        return conv_res
    @abstractmethod
-    def _build_document(
+    def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
        self, in_doc: InputDocument, conv_res: ConversionResult
    ) -> ConversionResult:
        pass
-    def _assemble_document(
+    def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult:
        self, in_doc: InputDocument, conv_res: ConversionResult
    ) -> ConversionResult:
        return conv_res
-    def _enrich_document(
+    def _enrich_document(self, conv_res: ConversionResult) -> ConversionResult:
        self, in_doc: InputDocument, conv_res: ConversionResult
    ) -> ConversionResult:
        def _filter_elements(
            doc: DoclingDocument, model: BaseEnrichmentModel
@ -71,24 +70,23 @@ class BasePipeline(ABC):
                if model.is_processable(doc=doc, element=element):
                    yield element
-        for model in self.enrichment_pipe:
+        with TimeRecorder(conv_res, "doc_enrich", scope=ProfilingScope.DOCUMENT):
-            for element_batch in chunkify(
+            for model in self.enrichment_pipe:
-                _filter_elements(conv_res.document, model),
+                for element_batch in chunkify(
-                settings.perf.elements_batch_size,
+                    _filter_elements(conv_res.document, model),
-            ):
+                    settings.perf.elements_batch_size,
-                # TODO: currently we assume the element itself is modified, because
+                ):
-                # we don't have an interface to save the element back to the document
+                    # TODO: currently we assume the element itself is modified, because
-                for element in model(
+                    # we don't have an interface to save the element back to the document
-                    doc=conv_res.document, element_batch=element_batch
+                    for element in model(
-                ):  # Must exhaust!
+                        doc=conv_res.document, element_batch=element_batch
-                    pass
+                    ):  # Must exhaust!
                        pass
        return conv_res
    @abstractmethod
-    def _determine_status(
+    def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
        self, in_doc: InputDocument, conv_res: ConversionResult
    ) -> ConversionStatus:
        pass
    @classmethod
@ -110,66 +108,68 @@ class BasePipeline(ABC):
 class PaginatedPipeline(BasePipeline):  # TODO this is a bad name.
-    def _apply_on_pages(self, page_batch: Iterable[Page]) -> Iterable[Page]:
+    def _apply_on_pages(
        self, conv_res: ConversionResult, page_batch: Iterable[Page]
    ) -> Iterable[Page]:
        for model in self.build_pipe:
-            page_batch = model(page_batch)
+            page_batch = model(conv_res, page_batch)
        yield from page_batch
-    def _build_document(
+    def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
        self, in_doc: InputDocument, conv_res: ConversionResult
    ) -> ConversionResult:
-        if not isinstance(in_doc._backend, PdfDocumentBackend):
+        if not isinstance(conv_res.input._backend, PdfDocumentBackend):
            raise RuntimeError(
-                f"The selected backend {type(in_doc._backend).__name__} for {in_doc.file} is not a PDF backend. "
+                f"The selected backend {type(conv_res.input._backend).__name__} for {conv_res.input.file} is not a PDF backend. "
                f"Can not convert this with a PDF pipeline. "
                f"Please check your format configuration on DocumentConverter."
            )
            # conv_res.status = ConversionStatus.FAILURE
            # return conv_res
-        for i in range(0, in_doc.page_count):
+        with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
            conv_res.pages.append(Page(page_no=i))
-        try:
+            for i in range(0, conv_res.input.page_count):
-            # Iterate batches of pages (page_batch_size) in the doc
+                conv_res.pages.append(Page(page_no=i))
            for page_batch in chunkify(conv_res.pages, settings.perf.page_batch_size):
                start_pb_time = time.time()
-                # 1. Initialise the page resources
+            try:
-                init_pages = map(
+                # Iterate batches of pages (page_batch_size) in the doc
-                    functools.partial(self.initialize_page, in_doc), page_batch
+                for page_batch in chunkify(
                    conv_res.pages, settings.perf.page_batch_size
                ):
                    start_pb_time = time.time()
                    # 1. Initialise the page resources
                    init_pages = map(
                        functools.partial(self.initialize_page, conv_res), page_batch
                    )
                    # 2. Run pipeline stages
                    pipeline_pages = self._apply_on_pages(conv_res, init_pages)
                    for p in pipeline_pages:  # Must exhaust!
                        pass
                    end_pb_time = time.time() - start_pb_time
                    _log.debug(f"Finished converting page batch time={end_pb_time:.3f}")
            except Exception as e:
                conv_res.status = ConversionStatus.FAILURE
                trace = "\n".join(traceback.format_exception(e))
                _log.warning(
                    f"Encountered an error during conversion of document {conv_res.input.document_hash}:\n"
                    f"{trace}"
                )
                raise e
-                # 2. Run pipeline stages
+            finally:
-                pipeline_pages = self._apply_on_pages(init_pages)
+                # Always unload the PDF backend, even in case of failure
-
+                if conv_res.input._backend:
-                for p in pipeline_pages:  # Must exhaust!
+                    conv_res.input._backend.unload()
                    pass
                end_pb_time = time.time() - start_pb_time
                _log.debug(f"Finished converting page batch time={end_pb_time:.3f}")
        except Exception as e:
            conv_res.status = ConversionStatus.FAILURE
            trace = "\n".join(traceback.format_exception(e))
            _log.warning(
                f"Encountered an error during conversion of document {in_doc.document_hash}:\n"
                f"{trace}"
            )
            raise e
        finally:
            # Always unload the PDF backend, even in case of failure
            if in_doc._backend:
                in_doc._backend.unload()
        return conv_res
-    def _determine_status(
+    def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
        self, in_doc: InputDocument, conv_res: ConversionResult
    ) -> ConversionStatus:
        status = ConversionStatus.SUCCESS
        for page in conv_res.pages:
            if page._backend is None or not page._backend.is_valid():
@ -186,5 +186,5 @@ class PaginatedPipeline(BasePipeline):  # TODO this is a bad name.
    # Initialise and load resources for a page
    @abstractmethod
-    def initialize_page(self, doc: InputDocument, page: Page) -> Page:
+    def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:
        pass
--- a/docling/pipeline/simple_pipeline.py
+++ b/docling/pipeline/simple_pipeline.py
@ -5,8 +5,9 @@ from docling.backend.abstract_backend import (
    DeclarativeDocumentBackend,
 )
 from docling.datamodel.base_models import ConversionStatus
-from docling.datamodel.document import ConversionResult, InputDocument
+from docling.datamodel.document import ConversionResult, InputDocument, ProfilingScope
 from docling.datamodel.pipeline_options import PipelineOptions
 from docling.models.base_model import TimeRecorder
 from docling.pipeline.base_pipeline import BasePipeline
 _log = logging.getLogger(__name__)
@ -22,13 +23,11 @@ class SimplePipeline(BasePipeline):
    def __init__(self, pipeline_options: PipelineOptions):
        super().__init__(pipeline_options)
-    def _build_document(
+    def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
        self, in_doc: InputDocument, conv_res: ConversionResult
    ) -> ConversionResult:
-        if not isinstance(in_doc._backend, DeclarativeDocumentBackend):
+        if not isinstance(conv_res.input._backend, DeclarativeDocumentBackend):
            raise RuntimeError(
-                f"The selected backend {type(in_doc._backend).__name__} for {in_doc.file} is not a declarative backend. "
+                f"The selected backend {type(conv_res.input._backend).__name__} for {conv_res.input.file} is not a declarative backend. "
                f"Can not convert this with simple pipeline. "
                f"Please check your format configuration on DocumentConverter."
            )
@ -38,13 +37,11 @@ class SimplePipeline(BasePipeline):
        # Instead of running a page-level pipeline to build up the document structure,
        # the backend is expected to be of type DeclarativeDocumentBackend, which can output
        # a DoclingDocument straight.
-
+        with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
-        conv_res.document = in_doc._backend.convert()
+            conv_res.document = conv_res.input._backend.convert()
        return conv_res
-    def _determine_status(
+    def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
        self, in_doc: InputDocument, conv_res: ConversionResult
    ) -> ConversionStatus:
        # This is called only if the previous steps didn't raise.
        # Since we don't have anything else to evaluate, we can
        # safely return SUCCESS.
--- a/docling/pipeline/standard_pdf_pipeline.py
+++ b/docling/pipeline/standard_pdf_pipeline.py
@ -7,13 +7,14 @@ from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
 from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.datamodel.base_models import AssembledUnit, Page
-from docling.datamodel.document import ConversionResult, InputDocument
+from docling.datamodel.document import ConversionResult, InputDocument, ProfilingScope
 from docling.datamodel.pipeline_options import (
    EasyOcrOptions,
    PdfPipelineOptions,
    TesseractCliOcrOptions,
    TesseractOcrOptions,
 )
 from docling.models.base_model import TimeRecorder
 from docling.models.base_ocr_model import BaseOcrModel
 from docling.models.ds_glm_model import GlmModel, GlmOptions
 from docling.models.easyocr_model import EasyOcrModel
@ -119,73 +120,75 @@ class StandardPdfPipeline(PaginatedPipeline):
            )
        return None
-    def initialize_page(self, doc: InputDocument, page: Page) -> Page:
+    def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:
-        page._backend = doc._backend.load_page(page.page_no)  # type: ignore
+        with TimeRecorder(conv_res, "init_page"):
-        if page._backend is not None and page._backend.is_valid():
+            page._backend = conv_res.input._backend.load_page(page.page_no)  # type: ignore
-            page.size = page._backend.get_size()
+            if page._backend is not None and page._backend.is_valid():
                page.size = page._backend.get_size()
        return page
-    def _assemble_document(
+    def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult:
        self, in_doc: InputDocument, conv_res: ConversionResult
    ) -> ConversionResult:
        all_elements = []
        all_headers = []
        all_body = []
-        for p in conv_res.pages:
+        with TimeRecorder(conv_res, "doc_assemble", scope=ProfilingScope.DOCUMENT):
-            if p.assembled is not None:
+            for p in conv_res.pages:
-                for el in p.assembled.body:
+                if p.assembled is not None:
-                    all_body.append(el)
+                    for el in p.assembled.body:
-                for el in p.assembled.headers:
+                        all_body.append(el)
-                    all_headers.append(el)
+                    for el in p.assembled.headers:
-                for el in p.assembled.elements:
+                        all_headers.append(el)
-                    all_elements.append(el)
+                    for el in p.assembled.elements:
                        all_elements.append(el)
-        conv_res.assembled = AssembledUnit(
+            conv_res.assembled = AssembledUnit(
-            elements=all_elements, headers=all_headers, body=all_body
+                elements=all_elements, headers=all_headers, body=all_body
-        )
+            )
-        conv_res.document = self.glm_model(conv_res)
+            conv_res.document = self.glm_model(conv_res)
-        # Generate page images in the output
+            # Generate page images in the output
-        if self.pipeline_options.generate_page_images:
+            if self.pipeline_options.generate_page_images:
-            for page in conv_res.pages:
+                for page in conv_res.pages:
                assert page.image is not None
                page_no = page.page_no + 1
                conv_res.document.pages[page_no].image = ImageRef.from_pil(
                    page.image, dpi=int(72 * self.pipeline_options.images_scale)
                )
        # Generate images of the requested element types
        if (
            self.pipeline_options.generate_picture_images
            or self.pipeline_options.generate_table_images
        ):
            scale = self.pipeline_options.images_scale
            for element, _level in conv_res.document.iterate_items():
                if not isinstance(element, DocItem) or len(element.prov) == 0:
                    continue
                if (
                    isinstance(element, PictureItem)
                    and self.pipeline_options.generate_picture_images
                ) or (
                    isinstance(element, TableItem)
                    and self.pipeline_options.generate_table_images
                ):
                    page_ix = element.prov[0].page_no - 1
                    page = conv_res.pages[page_ix]
                    assert page.size is not None
                    assert page.image is not None
-
+                    page_no = page.page_no + 1
-                    crop_bbox = (
+                    conv_res.document.pages[page_no].image = ImageRef.from_pil(
-                        element.prov[0]
+                        page.image, dpi=int(72 * self.pipeline_options.images_scale)
                        .bbox.scaled(scale=scale)
                        .to_top_left_origin(page_height=page.size.height * scale)
                    )
-                    cropped_im = page.image.crop(crop_bbox.as_tuple())
+            # Generate images of the requested element types
-                    element.image = ImageRef.from_pil(cropped_im, dpi=int(72 * scale))
+            if (
                self.pipeline_options.generate_picture_images
                or self.pipeline_options.generate_table_images
            ):
                scale = self.pipeline_options.images_scale
                for element, _level in conv_res.document.iterate_items():
                    if not isinstance(element, DocItem) or len(element.prov) == 0:
                        continue
                    if (
                        isinstance(element, PictureItem)
                        and self.pipeline_options.generate_picture_images
                    ) or (
                        isinstance(element, TableItem)
                        and self.pipeline_options.generate_table_images
                    ):
                        page_ix = element.prov[0].page_no - 1
                        page = conv_res.pages[page_ix]
                        assert page.size is not None
                        assert page.image is not None
                        crop_bbox = (
                            element.prov[0]
                            .bbox.scaled(scale=scale)
                            .to_top_left_origin(page_height=page.size.height * scale)
                        )
                        cropped_im = page.image.crop(crop_bbox.as_tuple())
                        element.image = ImageRef.from_pil(
                            cropped_im, dpi=int(72 * scale)
                        )
        return conv_res