Merge pull request #504 from DS4SD/cau/layout-postprocessing

feat: Support hierarchical layout components, expose and group content in pictures, forms and key-value regions Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2025-07-27 20:44:16 +00:00 · 2024-12-06 12:26:34 +01:00 · 2024-12-06 12:26:34 +01:00 · b0da1a2127
commit b0da1a2127
parent 78fad801fe 84f3548d30
16 changed files with 1292 additions and 1297 deletions
--- a/docling/backend/docling_parse_backend.py
+++ b/docling/backend/docling_parse_backend.py
@ -6,7 +6,7 @@ from typing import Iterable, List, Optional, Union

 import pypdfium2 as pdfium
 from docling_core.types.doc import BoundingBox, CoordOrigin, Size
-from docling_parse.docling_parse import pdf_parser_v1
+from docling_parse.pdf_parsers import pdf_parser_v1
 from PIL import Image, ImageDraw
 from pypdfium2 import PdfPage

--- a/docling/backend/docling_parse_v2_backend.py
+++ b/docling/backend/docling_parse_v2_backend.py
@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Iterable, List, Optional, Union

 import pypdfium2 as pdfium
 from docling_core.types.doc import BoundingBox, CoordOrigin
-from docling_parse.docling_parse import pdf_parser_v2
+from docling_parse.pdf_parsers import pdf_parser_v2
 from PIL import Image, ImageDraw
 from pypdfium2 import PdfPage

--- a/docling/datamodel/base_models.py
+++ b/docling/datamodel/base_models.py
@ -121,6 +121,7 @@ class Cluster(BaseModel):
    bbox: BoundingBox
    confidence: float = 1.0
    cells: List[Cell] = []
+    children: List["Cluster"] = []  # Add child cluster support


 class BasePageElement(BaseModel):
@ -135,6 +136,12 @@ class LayoutPrediction(BaseModel):
    clusters: List[Cluster] = []


+class ContainerElement(
+    BasePageElement
+):  # Used for Form and Key-Value-Regions, only for typing.
+    pass
+
+
 class Table(BasePageElement):
    otsl_seq: List[str]
    num_rows: int = 0
@ -174,7 +181,7 @@ class PagePredictions(BaseModel):
    equations_prediction: Optional[EquationPrediction] = None


-PageElement = Union[TextElement, Table, FigureElement]
+PageElement = Union[TextElement, Table, FigureElement, ContainerElement]


 class AssembledUnit(BaseModel):
--- a/docling/datamodel/document.py
+++ b/docling/datamodel/document.py
@ -77,6 +77,8 @@ layout_label_to_ds_type = {
    DocItemLabel.PICTURE: "figure",
    DocItemLabel.TEXT: "paragraph",
    DocItemLabel.PARAGRAPH: "paragraph",
+    DocItemLabel.FORM: DocItemLabel.FORM.value,
+    DocItemLabel.KEY_VALUE_REGION: DocItemLabel.KEY_VALUE_REGION.value,
 }

 _EMPTY_DOCLING_DOC = DoclingDocument(name="dummy")
--- a/docling/datamodel/settings.py
+++ b/docling/datamodel/settings.py
@ -31,6 +31,7 @@ class DebugSettings(BaseModel):
    visualize_cells: bool = False
    visualize_ocr: bool = False
    visualize_layout: bool = False
+    visualize_raw_layout: bool = False
    visualize_tables: bool = False

    profile_pipeline_timings: bool = False
--- a/docling/document_converter.py
+++ b/docling/document_converter.py
@ -10,6 +10,7 @@ from pydantic import BaseModel, ConfigDict, model_validator, validate_call
 from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.asciidoc_backend import AsciiDocBackend
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
+from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
 from docling.backend.html_backend import HTMLDocumentBackend
 from docling.backend.md_backend import MarkdownDocumentBackend
 from docling.backend.msexcel_backend import MsExcelDocumentBackend
@ -84,7 +85,7 @@ class HTMLFormatOption(FormatOption):

 class PdfFormatOption(FormatOption):
    pipeline_cls: Type = StandardPdfPipeline
-    backend: Type[AbstractDocumentBackend] = DoclingParseDocumentBackend
+    backend: Type[AbstractDocumentBackend] = DoclingParseV2DocumentBackend


 class ImageFormatOption(FormatOption):
--- a/docling/models/ds_glm_model.py
+++ b/docling/models/ds_glm_model.py
@ -4,7 +4,6 @@ from pathlib import Path
 from typing import List, Union

 from deepsearch_glm.nlp_utils import init_nlp_model
-from deepsearch_glm.utils.doc_utils import to_docling_document
 from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_models
 from docling_core.types.doc import BoundingBox, CoordOrigin, DoclingDocument
 from docling_core.types.legacy_doc.base import BoundingBox as DsBoundingBox
@ -24,11 +23,18 @@ from docling_core.types.legacy_doc.document import (
 from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileInfoObject
 from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument
 from PIL import ImageDraw
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, TypeAdapter

-from docling.datamodel.base_models import Cluster, FigureElement, Table, TextElement
+from docling.datamodel.base_models import (
+    Cluster,
+    ContainerElement,
+    FigureElement,
+    Table,
+    TextElement,
+)
 from docling.datamodel.document import ConversionResult, layout_label_to_ds_type
 from docling.datamodel.settings import settings
+from docling.utils.glm_utils import to_docling_document
 from docling.utils.profiling import ProfilingScope, TimeRecorder
 from docling.utils.utils import create_hash

@ -45,7 +51,9 @@ class GlmModel:

        if self.options.model_names != "":
            load_pretrained_nlp_models()
-        self.model = init_nlp_model(model_names=self.options.model_names)
+        self.model = init_nlp_model(
+            model_names=self.options.model_names, loglevel="ERROR"
+        )

    def _to_legacy_document(self, conv_res) -> DsDocument:
        title = ""
@ -207,7 +215,31 @@ class GlmModel:
                            )
                        ],
                        obj_type=layout_label_to_ds_type.get(element.label),
-                        # data=[[]],
+                        payload={
+                            "children": TypeAdapter(List[Cluster]).dump_python(
+                                element.cluster.children
+                            )
+                        },  # hack to channel child clusters through GLM
+                    )
+                )
+            elif isinstance(element, ContainerElement):
+                main_text.append(
+                    BaseText(
+                        text="",
+                        payload={
+                            "children": TypeAdapter(List[Cluster]).dump_python(
+                                element.cluster.children
+                            )
+                        },  # hack to channel child clusters through GLM
+                        obj_type=layout_label_to_ds_type.get(element.label),
+                        name=element.label,
+                        prov=[
+                            Prov(
+                                bbox=target_bbox,
+                                page=element.page_no + 1,
+                                span=[0, 0],
+                            )
+                        ],
                    )
                )

@ -232,7 +264,7 @@ class GlmModel:
    def __call__(self, conv_res: ConversionResult) -> DoclingDocument:
        with TimeRecorder(conv_res, "glm", scope=ProfilingScope.DOCUMENT):
            ds_doc = self._to_legacy_document(conv_res)
-            ds_doc_dict = ds_doc.model_dump(by_alias=True)
+            ds_doc_dict = ds_doc.model_dump(by_alias=True, exclude_none=True)

            glm_doc = self.model.apply_on_doc(ds_doc_dict)

--- a/docling/models/layout_model.py
+++ b/docling/models/layout_model.py
@ -7,7 +7,7 @@ from typing import Iterable, List

 from docling_core.types.doc import CoordOrigin, DocItemLabel
 from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
-from PIL import ImageDraw
+from PIL import Image, ImageDraw

 from docling.datamodel.base_models import (
    BoundingBox,
@ -19,7 +19,7 @@ from docling.datamodel.base_models import (
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
-from docling.utils import layout_utils as lu
+from docling.utils.layout_postprocessor import LayoutPostprocessor
 from docling.utils.profiling import TimeRecorder

 _log = logging.getLogger(__name__)
@ -46,233 +46,108 @@ class LayoutModel(BasePageModel):
    FIGURE_LABEL = DocItemLabel.PICTURE
    FORMULA_LABEL = DocItemLabel.FORMULA

+    CONTAINER_LABELS = [DocItemLabel.FORM, DocItemLabel.KEY_VALUE_REGION]
+
    def __init__(self, artifacts_path: Path):
        self.layout_predictor = LayoutPredictor(artifacts_path)  # TODO temporary

-    def postprocess(self, clusters_in: List[Cluster], cells: List[Cell], page_height):
-        MIN_INTERSECTION = 0.2
-        CLASS_THRESHOLDS = {
-            DocItemLabel.CAPTION: 0.35,
-            DocItemLabel.FOOTNOTE: 0.35,
-            DocItemLabel.FORMULA: 0.35,
-            DocItemLabel.LIST_ITEM: 0.35,
-            DocItemLabel.PAGE_FOOTER: 0.35,
-            DocItemLabel.PAGE_HEADER: 0.35,
-            DocItemLabel.PICTURE: 0.2,  # low threshold adjust to capture chemical structures for examples.
-            DocItemLabel.SECTION_HEADER: 0.45,
-            DocItemLabel.TABLE: 0.35,
-            DocItemLabel.TEXT: 0.45,
-            DocItemLabel.TITLE: 0.45,
-            DocItemLabel.DOCUMENT_INDEX: 0.45,
-            DocItemLabel.CODE: 0.45,
-            DocItemLabel.CHECKBOX_SELECTED: 0.45,
-            DocItemLabel.CHECKBOX_UNSELECTED: 0.45,
-            DocItemLabel.FORM: 0.45,
-            DocItemLabel.KEY_VALUE_REGION: 0.45,
+    def draw_clusters_and_cells_side_by_side(
+        self, conv_res, page, clusters, mode_prefix: str, show: bool = False
+    ):
+        """
+        Draws a page image side by side with clusters filtered into two categories:
+        - Left: Clusters excluding FORM, KEY_VALUE_REGION, and PICTURE.
+        - Right: Clusters including FORM, KEY_VALUE_REGION, and PICTURE.
+        """
+        label_to_color = {
+            DocItemLabel.TEXT: (255, 255, 153),  # Light Yellow
+            DocItemLabel.CAPTION: (255, 204, 153),  # Light Orange
+            DocItemLabel.LIST_ITEM: (153, 153, 255),  # Light Purple
+            DocItemLabel.FORMULA: (192, 192, 192),  # Gray
+            DocItemLabel.TABLE: (255, 204, 204),  # Light Pink
+            DocItemLabel.PICTURE: (255, 204, 164),  # Light Beige
+            DocItemLabel.SECTION_HEADER: (255, 153, 153),  # Light Red
+            DocItemLabel.PAGE_HEADER: (204, 255, 204),  # Light Green
+            DocItemLabel.PAGE_FOOTER: (
+                204,
+                255,
+                204,
+            ),  # Light Green (same as Page-Header)
+            DocItemLabel.TITLE: (255, 153, 153),  # Light Red (same as Section-Header)
+            DocItemLabel.FOOTNOTE: (200, 200, 255),  # Light Blue
+            DocItemLabel.DOCUMENT_INDEX: (220, 220, 220),  # Light Gray
+            DocItemLabel.CODE: (255, 223, 186),  # Peach
+            DocItemLabel.CHECKBOX_SELECTED: (255, 182, 193),  # Pale Green
+            DocItemLabel.CHECKBOX_UNSELECTED: (255, 182, 193),  # Light Pink
+            DocItemLabel.FORM: (200, 255, 255),  # Light Cyan
+            DocItemLabel.KEY_VALUE_REGION: (183, 65, 14),  # Rusty orange
        }

-        CLASS_REMAPPINGS = {
-            DocItemLabel.DOCUMENT_INDEX: DocItemLabel.TABLE,
-            DocItemLabel.TITLE: DocItemLabel.SECTION_HEADER,
+        # Filter clusters for left and right images
+        exclude_labels = {
+            DocItemLabel.FORM,
+            DocItemLabel.KEY_VALUE_REGION,
+            DocItemLabel.PICTURE,
        }
+        left_clusters = [c for c in clusters if c.label not in exclude_labels]
+        right_clusters = [c for c in clusters if c.label in exclude_labels]

-        _log.debug("================= Start postprocess function ====================")
-        start_time = time.time()
-        # Apply Confidence Threshold to cluster predictions
-        # confidence = self.conf_threshold
-        clusters_mod = []
+        # Create a deep copy of the original image for both sides
+        left_image = copy.deepcopy(page.image)
+        right_image = copy.deepcopy(page.image)

-        for cluster in clusters_in:
-            confidence = CLASS_THRESHOLDS[cluster.label]
-            if cluster.confidence >= confidence:
-                # annotation["created_by"] = "high_conf_pred"
+        # Function to draw clusters on an image
+        def draw_clusters(image, clusters):
+            draw = ImageDraw.Draw(image, "RGBA")
+            for c_tl in clusters:
+                all_clusters = [c_tl, *c_tl.children]
+                for c in all_clusters:
+                    cell_color = (0, 0, 0, 40)  # Transparent black for cells
+                    for tc in c.cells:
+                        cx0, cy0, cx1, cy1 = tc.bbox.as_tuple()
+                        draw.rectangle(
+                            [(cx0, cy0), (cx1, cy1)],
+                            outline=None,
+                            fill=cell_color,
+                        )

-                # Remap class labels where needed.
-                if cluster.label in CLASS_REMAPPINGS.keys():
-                    cluster.label = CLASS_REMAPPINGS[cluster.label]
-                clusters_mod.append(cluster)
+                    x0, y0, x1, y1 = c.bbox.as_tuple()
+                    cluster_fill_color = (
+                        *list(label_to_color.get(c.label)),  # type: ignore
+                        70,
+                    )
+                    cluster_outline_color = (
+                        *list(label_to_color.get(c.label)),  # type: ignore
+                        255,
+                    )
+                    draw.rectangle(
+                        [(x0, y0), (x1, y1)],
+                        outline=cluster_outline_color,
+                        fill=cluster_fill_color,
+                    )

-        # map to dictionary clusters and cells, with bottom left origin
-        clusters_orig = [
-            {
-                "id": c.id,
-                "bbox": list(
-                    c.bbox.to_bottom_left_origin(page_height).as_tuple()
-                ),  # TODO
-                "confidence": c.confidence,
-                "cell_ids": [],
-                "type": c.label,
-            }
-            for c in clusters_in
-        ]
+        # Draw clusters on both images
+        draw_clusters(left_image, left_clusters)
+        draw_clusters(right_image, right_clusters)

-        clusters_out = [
-            {
-                "id": c.id,
-                "bbox": list(
-                    c.bbox.to_bottom_left_origin(page_height).as_tuple()
-                ),  # TODO
-                "confidence": c.confidence,
-                "created_by": "high_conf_pred",
-                "cell_ids": [],
-                "type": c.label,
-            }
-            for c in clusters_mod
-        ]
+        # Combine the images side by side
+        combined_width = left_image.width * 2
+        combined_height = left_image.height
+        combined_image = Image.new("RGB", (combined_width, combined_height))
+        combined_image.paste(left_image, (0, 0))
+        combined_image.paste(right_image, (left_image.width, 0))

-        del clusters_mod
-
-        raw_cells = [
-            {
-                "id": c.id,
-                "bbox": list(
-                    c.bbox.to_bottom_left_origin(page_height).as_tuple()
-                ),  # TODO
-                "text": c.text,
-            }
-            for c in cells
-        ]
-        cell_count = len(raw_cells)
-
-        _log.debug("---- 0. Treat cluster overlaps ------")
-        clusters_out = lu.remove_cluster_duplicates_by_conf(clusters_out, 0.8)
-
-        _log.debug(
-            "---- 1. Initially assign cells to clusters based on minimum intersection ------"
-        )
-        ## Check for cells included in or touched by clusters:
-        clusters_out = lu.assigning_cell_ids_to_clusters(
-            clusters_out, raw_cells, MIN_INTERSECTION
-        )
-
-        _log.debug("---- 2. Assign Orphans with Low Confidence Detections")
-        # Creates a map of cell_id->cluster_id
-        (
-            clusters_around_cells,
-            orphan_cell_indices,
-            ambiguous_cell_indices,
-        ) = lu.cell_id_state_map(clusters_out, cell_count)
-
-        # Assign orphan cells with lower confidence predictions
-        clusters_out, orphan_cell_indices = lu.assign_orphans_with_low_conf_pred(
-            clusters_out, clusters_orig, raw_cells, orphan_cell_indices
-        )
-
-        # Refresh the cell_ids assignment, after creating new clusters using low conf predictions
-        clusters_out = lu.assigning_cell_ids_to_clusters(
-            clusters_out, raw_cells, MIN_INTERSECTION
-        )
-
-        _log.debug("---- 3. Settle Ambigous Cells")
-        # Creates an update map after assignment of cell_id->cluster_id
-        (
-            clusters_around_cells,
-            orphan_cell_indices,
-            ambiguous_cell_indices,
-        ) = lu.cell_id_state_map(clusters_out, cell_count)
-
-        # Settle pdf cells that belong to multiple clusters
-        clusters_out, ambiguous_cell_indices = lu.remove_ambigous_pdf_cell_by_conf(
-            clusters_out, raw_cells, ambiguous_cell_indices
-        )
-
-        _log.debug("---- 4. Set Orphans as Text")
-        (
-            clusters_around_cells,
-            orphan_cell_indices,
-            ambiguous_cell_indices,
-        ) = lu.cell_id_state_map(clusters_out, cell_count)
-
-        clusters_out, orphan_cell_indices = lu.set_orphan_as_text(
-            clusters_out, clusters_orig, raw_cells, orphan_cell_indices
-        )
-
-        _log.debug("---- 5. Merge Cells & and adapt the bounding boxes")
-        # Merge cells orphan cells
-        clusters_out = lu.merge_cells(clusters_out)
-
-        # Clean up clusters that remain from merged and unreasonable clusters
-        clusters_out = lu.clean_up_clusters(
-            clusters_out,
-            raw_cells,
-            merge_cells=True,
-            img_table=True,
-            one_cell_table=True,
-        )
-
-        new_clusters = lu.adapt_bboxes(raw_cells, clusters_out, orphan_cell_indices)
-        clusters_out = new_clusters
-
-        ## We first rebuild where every cell is now:
-        ##   Now we write into a prediction cells list, not into the raw cells list.
-        ##   As we don't need previous labels, we best overwrite any old list, because that might
-        ##   have been sorted differently.
-        (
-            clusters_around_cells,
-            orphan_cell_indices,
-            ambiguous_cell_indices,
-        ) = lu.cell_id_state_map(clusters_out, cell_count)
-
-        target_cells = []
-        for ix, cell in enumerate(raw_cells):
-            new_cell = {
-                "id": ix,
-                "rawcell_id": ix,
-                "label": "None",
-                "bbox": cell["bbox"],
-                "text": cell["text"],
-            }
-            for cluster_index in clusters_around_cells[
-                ix
-            ]:  # By previous analysis, this is always 1 cluster.
-                new_cell["label"] = clusters_out[cluster_index]["type"]
-            target_cells.append(new_cell)
-            # _log.debug("New label of cell " + str(ix) + " is " + str(new_cell["label"]))
-        cells_out = target_cells
-
-        ## -------------------------------
-        ## Sort clusters into reasonable reading order, and sort the cells inside each cluster
-        _log.debug("---- 5. Sort clusters in reading order ------")
-        sorted_clusters = lu.produce_reading_order(
-            clusters_out, "raw_cell_ids", "raw_cell_ids", True
-        )
-        clusters_out = sorted_clusters
-
-        # end_time = timer()
-        _log.debug("---- End of postprocessing function ------")
-        end_time = time.time() - start_time
-        _log.debug(f"Finished post processing in seconds={end_time:.3f}")
-
-        cells_out_new = [
-            Cell(
-                id=c["id"],  # type: ignore
-                bbox=BoundingBox.from_tuple(
-                    coord=c["bbox"], origin=CoordOrigin.BOTTOMLEFT  # type: ignore
-                ).to_top_left_origin(page_height),
-                text=c["text"],  # type: ignore
+        if show:
+            combined_image.show()
+        else:
+            out_path: Path = (
+                Path(settings.debug.debug_output_path)
+                / f"debug_{conv_res.input.file.stem}"
            )
-            for c in cells_out
-        ]
+            out_path.mkdir(parents=True, exist_ok=True)

-        del cells_out
-
-        clusters_out_new = []
-        for c in clusters_out:
-            cluster_cells = [
-                ccell for ccell in cells_out_new if ccell.id in c["cell_ids"]  # type: ignore
-            ]
-            c_new = Cluster(
-                id=c["id"],  # type: ignore
-                bbox=BoundingBox.from_tuple(
-                    coord=c["bbox"], origin=CoordOrigin.BOTTOMLEFT  # type: ignore
-                ).to_top_left_origin(page_height),
-                confidence=c["confidence"],  # type: ignore
-                label=DocItemLabel(c["type"]),
-                cells=cluster_cells,
-            )
-            clusters_out_new.append(c_new)
-
-        return clusters_out_new, cells_out_new
+            out_file = out_path / f"{mode_prefix}_layout_page_{page.page_no:05}.png"
+            combined_image.save(str(out_file), format="png")

    def __call__(
        self, conv_res: ConversionResult, page_batch: Iterable[Page]
@ -305,43 +180,78 @@ class LayoutModel(BasePageModel):
                        )
                        clusters.append(cluster)

-                    # Map cells to clusters
-                    # TODO: Remove, postprocess should take care of it anyway.
-                    for cell in page.cells:
-                        for cluster in clusters:
-                            if not cell.bbox.area() > 0:
-                                overlap_frac = 0.0
-                            else:
-                                overlap_frac = (
-                                    cell.bbox.intersection_area_with(cluster.bbox)
-                                    / cell.bbox.area()
-                                )
-
-                            if overlap_frac > 0.5:
-                                cluster.cells.append(cell)
-
-                    # Pre-sort clusters
-                    # clusters = self.sort_clusters_by_cell_order(clusters)
-
                    # DEBUG code:
-                    def draw_clusters_and_cells(show: bool = False):
+                    def draw_clusters_and_cells(
+                        clusters, mode_prefix: str, show: bool = False
+                    ):
+                        label_to_color = {
+                            DocItemLabel.TEXT: (255, 255, 153),  # Light Yellow
+                            DocItemLabel.CAPTION: (255, 204, 153),  # Light Orange
+                            DocItemLabel.LIST_ITEM: (153, 153, 255),  # Light Purple
+                            DocItemLabel.FORMULA: (192, 192, 192),  # Gray
+                            DocItemLabel.TABLE: (255, 204, 204),  # Light Pink
+                            DocItemLabel.PICTURE: (255, 255, 204),  # Light Beige
+                            DocItemLabel.SECTION_HEADER: (255, 153, 153),  # Light Red
+                            DocItemLabel.PAGE_HEADER: (204, 255, 204),  # Light Green
+                            DocItemLabel.PAGE_FOOTER: (
+                                204,
+                                255,
+                                204,
+                            ),  # Light Green (same as Page-Header)
+                            DocItemLabel.TITLE: (
+                                255,
+                                153,
+                                153,
+                            ),  # Light Red (same as Section-Header)
+                            DocItemLabel.FOOTNOTE: (200, 200, 255),  # Light Blue
+                            DocItemLabel.DOCUMENT_INDEX: (220, 220, 220),  # Light Gray
+                            DocItemLabel.CODE: (255, 223, 186),  # Peach
+                            DocItemLabel.CHECKBOX_SELECTED: (
+                                255,
+                                182,
+                                193,
+                            ),  # Pale Green
+                            DocItemLabel.CHECKBOX_UNSELECTED: (
+                                255,
+                                182,
+                                193,
+                            ),  # Light Pink
+                            DocItemLabel.FORM: (200, 255, 255),  # Light Cyan
+                            DocItemLabel.KEY_VALUE_REGION: (
+                                183,
+                                65,
+                                14,
+                            ),  # Rusty orange
+                        }
+
                        image = copy.deepcopy(page.image)
                        if image is not None:
-                            draw = ImageDraw.Draw(image)
+                            draw = ImageDraw.Draw(image, "RGBA")
                            for c in clusters:
-                                x0, y0, x1, y1 = c.bbox.as_tuple()
-                                draw.rectangle([(x0, y0), (x1, y1)], outline="green")
-
-                                cell_color = (
-                                    random.randint(30, 140),
-                                    random.randint(30, 140),
-                                    random.randint(30, 140),
-                                )
+                                cell_color = (0, 0, 0, 40)
                                for tc in c.cells:  # [:1]:
-                                    x0, y0, x1, y1 = tc.bbox.as_tuple()
+                                    cx0, cy0, cx1, cy1 = tc.bbox.as_tuple()
                                    draw.rectangle(
-                                        [(x0, y0), (x1, y1)], outline=cell_color
+                                        [(cx0, cy0), (cx1, cy1)],
+                                        outline=None,
+                                        fill=cell_color,
                                    )
+
+                                x0, y0, x1, y1 = c.bbox.as_tuple()
+                                cluster_fill_color = (
+                                    *list(label_to_color.get(c.label)),  # type: ignore
+                                    70,
+                                )
+                                cluster_outline_color = (
+                                    *list(label_to_color.get(c.label)),  # type: ignore
+                                    255,
+                                )
+                                draw.rectangle(
+                                    [(x0, y0), (x1, y1)],
+                                    outline=cluster_outline_color,
+                                    fill=cluster_fill_color,
+                                )
+
                            if show:
                                image.show()
                            else:
@ -352,19 +262,30 @@ class LayoutModel(BasePageModel):
                                out_path.mkdir(parents=True, exist_ok=True)

                                out_file = (
-                                    out_path / f"layout_page_{page.page_no:05}.png"
+                                    out_path
+                                    / f"{mode_prefix}_layout_page_{page.page_no:05}.png"
                                )
                                image.save(str(out_file), format="png")

-                    # draw_clusters_and_cells()
+                    if settings.debug.visualize_raw_layout:
+                        self.draw_clusters_and_cells_side_by_side(
+                            conv_res, page, clusters, mode_prefix="raw"
+                        )

-                    clusters, page.cells = self.postprocess(
-                        clusters, page.cells, page.size.height
+                    # Apply postprocessing
+                    processed_clusters, processed_cells = LayoutPostprocessor(
+                        page.cells, clusters
+                    ).postprocess()
+                    # processed_clusters, processed_cells = clusters, page.cells
+
+                    page.cells = processed_cells
+                    page.predictions.layout = LayoutPrediction(
+                        clusters=processed_clusters
                    )

-                    page.predictions.layout = LayoutPrediction(clusters=clusters)
-
                if settings.debug.visualize_layout:
-                    draw_clusters_and_cells()
+                    self.draw_clusters_and_cells_side_by_side(
+                        conv_res, page, processed_clusters, mode_prefix="postprocessed"
+                    )

                yield page
--- a/docling/models/page_assemble_model.py
+++ b/docling/models/page_assemble_model.py
@ -6,6 +6,7 @@ from pydantic import BaseModel

 from docling.datamodel.base_models import (
    AssembledUnit,
+    ContainerElement,
    FigureElement,
    Page,
    PageElement,
@ -159,6 +160,15 @@ class PageAssembleModel(BasePageModel):
                                )
                            elements.append(equation)
                            body.append(equation)
+                        elif cluster.label in LayoutModel.CONTAINER_LABELS:
+                            container_el = ContainerElement(
+                                label=cluster.label,
+                                id=cluster.id,
+                                page_no=page.page_no,
+                                cluster=cluster,
+                            )
+                            elements.append(container_el)
+                            body.append(container_el)

                    page.assembled = AssembledUnit(
                        elements=elements, headers=headers, body=body
--- a/docling/pipeline/standard_pdf_pipeline.py
+++ b/docling/pipeline/standard_pdf_pipeline.py
@ -38,7 +38,7 @@ _log = logging.getLogger(__name__)


 class StandardPdfPipeline(PaginatedPipeline):
-    _layout_model_path = "model_artifacts/layout/beehive_v0.0.5_pt"
+    _layout_model_path = "model_artifacts/layout"
    _table_model_path = "model_artifacts/tableformer"

    def __init__(self, pipeline_options: PdfPipelineOptions):
@ -102,7 +102,7 @@ class StandardPdfPipeline(PaginatedPipeline):
            repo_id="ds4sd/docling-models",
            force_download=force,
            local_dir=local_dir,
-            revision="v2.0.1",
+            revision="refs/pr/2",
        )

        return Path(download_path)
--- a/docling/utils/glm_utils.py
+++ b/docling/utils/glm_utils.py
@ -0,0 +1,336 @@
+import re
+from pathlib import Path
+from typing import List
+
+import pandas as pd
+from docling_core.types.doc import (
+    BoundingBox,
+    CoordOrigin,
+    DocItemLabel,
+    DoclingDocument,
+    DocumentOrigin,
+    GroupLabel,
+    ProvenanceItem,
+    Size,
+    TableCell,
+    TableData,
+)
+
+
+def resolve_item(paths, obj):
+    """Find item in document from a reference path"""
+
+    if len(paths) == 0:
+        return obj
+
+    if paths[0] == "#":
+        return resolve_item(paths[1:], obj)
+
+    try:
+        key = int(paths[0])
+    except:
+        key = paths[0]
+
+    if len(paths) == 1:
+        if isinstance(key, str) and key in obj:
+            return obj[key]
+        elif isinstance(key, int) and key < len(obj):
+            return obj[key]
+        else:
+            return None
+
+    elif len(paths) > 1:
+        if isinstance(key, str) and key in obj:
+            return resolve_item(paths[1:], obj[key])
+        elif isinstance(key, int) and key < len(obj):
+            return resolve_item(paths[1:], obj[key])
+        else:
+            return None
+
+    else:
+        return None
+
+
+def _flatten_table_grid(grid: List[List[dict]]) -> List[dict]:
+    unique_objects = []
+    seen_spans = set()
+
+    for sublist in grid:
+        for obj in sublist:
+            # Convert the spans list to a tuple of tuples for hashing
+            spans_tuple = tuple(tuple(span) for span in obj["spans"])
+            if spans_tuple not in seen_spans:
+                seen_spans.add(spans_tuple)
+                unique_objects.append(obj)
+
+    return unique_objects
+
+
+def to_docling_document(doc_glm, update_name_label=False) -> DoclingDocument:
+    origin = DocumentOrigin(
+        mimetype="application/pdf",
+        filename=doc_glm["file-info"]["filename"],
+        binary_hash=doc_glm["file-info"]["document-hash"],
+    )
+    doc_name = Path(origin.filename).stem
+
+    doc: DoclingDocument = DoclingDocument(name=doc_name, origin=origin)
+
+    for page_dim in doc_glm["page-dimensions"]:
+        page_no = int(page_dim["page"])
+        size = Size(width=page_dim["width"], height=page_dim["height"])
+
+        doc.add_page(page_no=page_no, size=size)
+
+    if "properties" in doc_glm:
+        props = pd.DataFrame(
+            doc_glm["properties"]["data"], columns=doc_glm["properties"]["headers"]
+        )
+    else:
+        props = pd.DataFrame()
+
+    current_list = None
+
+    for ix, pelem in enumerate(doc_glm["page-elements"]):
+        ptype = pelem["type"]
+        span_i = pelem["span"][0]
+        span_j = pelem["span"][1]
+
+        if "iref" not in pelem:
+            # print(json.dumps(pelem, indent=2))
+            continue
+
+        iref = pelem["iref"]
+
+        if re.match("#/figures/(\\d+)/captions/(.+)", iref):
+            # print(f"skip {iref}")
+            continue
+
+        if re.match("#/tables/(\\d+)/captions/(.+)", iref):
+            # print(f"skip {iref}")
+            continue
+
+        path = iref.split("/")
+        obj = resolve_item(path, doc_glm)
+
+        if obj is None:
+            current_list = None
+            print(f"warning: undefined {path}")
+            continue
+
+        if ptype == "figure":
+            current_list = None
+            text = ""
+            caption_refs = []
+            for caption in obj["captions"]:
+                text += caption["text"]
+
+                for nprov in caption["prov"]:
+                    npaths = nprov["$ref"].split("/")
+                    nelem = resolve_item(npaths, doc_glm)
+
+                    if nelem is None:
+                        # print(f"warning: undefined caption {npaths}")
+                        continue
+
+                    span_i = nelem["span"][0]
+                    span_j = nelem["span"][1]
+
+                    cap_text = caption["text"][span_i:span_j]
+
+                    # doc_glm["page-elements"].remove(nelem)
+
+                    prov = ProvenanceItem(
+                        page_no=nelem["page"],
+                        charspan=tuple(nelem["span"]),
+                        bbox=BoundingBox.from_tuple(
+                            nelem["bbox"], origin=CoordOrigin.BOTTOMLEFT
+                        ),
+                    )
+
+                    caption_obj = doc.add_text(
+                        label=DocItemLabel.CAPTION, text=cap_text, prov=prov
+                    )
+                    caption_refs.append(caption_obj.get_ref())
+
+            prov = ProvenanceItem(
+                page_no=pelem["page"],
+                charspan=(0, len(text)),
+                bbox=BoundingBox.from_tuple(
+                    pelem["bbox"], origin=CoordOrigin.BOTTOMLEFT
+                ),
+            )
+
+            pic = doc.add_picture(prov=prov)
+            pic.captions.extend(caption_refs)
+            _add_child_elements(pic, doc, obj, pelem)
+
+        elif ptype == "table":
+            current_list = None
+            text = ""
+            caption_refs = []
+            for caption in obj["captions"]:
+                text += caption["text"]
+
+                for nprov in caption["prov"]:
+                    npaths = nprov["$ref"].split("/")
+                    nelem = resolve_item(npaths, doc_glm)
+
+                    if nelem is None:
+                        # print(f"warning: undefined caption {npaths}")
+                        continue
+
+                    span_i = nelem["span"][0]
+                    span_j = nelem["span"][1]
+
+                    cap_text = caption["text"][span_i:span_j]
+
+                    # doc_glm["page-elements"].remove(nelem)
+
+                    prov = ProvenanceItem(
+                        page_no=nelem["page"],
+                        charspan=tuple(nelem["span"]),
+                        bbox=BoundingBox.from_tuple(
+                            nelem["bbox"], origin=CoordOrigin.BOTTOMLEFT
+                        ),
+                    )
+
+                    caption_obj = doc.add_text(
+                        label=DocItemLabel.CAPTION, text=cap_text, prov=prov
+                    )
+                    caption_refs.append(caption_obj.get_ref())
+
+            table_cells_glm = _flatten_table_grid(obj["data"])
+
+            table_cells = []
+            for tbl_cell_glm in table_cells_glm:
+                if tbl_cell_glm["bbox"] is not None:
+                    bbox = BoundingBox.from_tuple(
+                        tbl_cell_glm["bbox"], origin=CoordOrigin.BOTTOMLEFT
+                    )
+                else:
+                    bbox = None
+
+                is_col_header = False
+                is_row_header = False
+                is_row_section = False
+
+                if tbl_cell_glm["type"] == "col_header":
+                    is_col_header = True
+                elif tbl_cell_glm["type"] == "row_header":
+                    is_row_header = True
+                elif tbl_cell_glm["type"] == "row_section":
+                    is_row_section = True
+
+                table_cells.append(
+                    TableCell(
+                        row_span=tbl_cell_glm["row-span"][1]
+                        - tbl_cell_glm["row-span"][0],
+                        col_span=tbl_cell_glm["col-span"][1]
+                        - tbl_cell_glm["col-span"][0],
+                        start_row_offset_idx=tbl_cell_glm["row-span"][0],
+                        end_row_offset_idx=tbl_cell_glm["row-span"][1],
+                        start_col_offset_idx=tbl_cell_glm["col-span"][0],
+                        end_col_offset_idx=tbl_cell_glm["col-span"][1],
+                        text=tbl_cell_glm["text"],
+                        bbox=bbox,
+                        column_header=is_col_header,
+                        row_header=is_row_header,
+                        row_section=is_row_section,
+                    )
+                )
+
+            tbl_data = TableData(
+                num_rows=obj.get("#-rows", 0),
+                num_cols=obj.get("#-cols", 0),
+                table_cells=table_cells,
+            )
+
+            prov = ProvenanceItem(
+                page_no=pelem["page"],
+                charspan=(0, 0),
+                bbox=BoundingBox.from_tuple(
+                    pelem["bbox"], origin=CoordOrigin.BOTTOMLEFT
+                ),
+            )
+
+            tbl = doc.add_table(data=tbl_data, prov=prov)
+            tbl.captions.extend(caption_refs)
+
+        elif ptype in ["form", "key_value_region"]:
+            label = DocItemLabel(ptype)
+            container_el = doc.add_group(label=GroupLabel.UNSPECIFIED, name=label)
+
+            _add_child_elements(container_el, doc, obj, pelem)
+
+        elif "text" in obj:
+            text = obj["text"][span_i:span_j]
+
+            type_label = pelem["type"]
+            name_label = pelem["name"]
+            if update_name_label and len(props) > 0 and type_label == "paragraph":
+                prop = props[
+                    (props["type"] == "semantic") & (props["subj_path"] == iref)
+                ]
+                if len(prop) == 1 and prop.iloc[0]["confidence"] > 0.85:
+                    name_label = prop.iloc[0]["label"]
+
+            prov = ProvenanceItem(
+                page_no=pelem["page"],
+                charspan=(0, len(text)),
+                bbox=BoundingBox.from_tuple(
+                    pelem["bbox"], origin=CoordOrigin.BOTTOMLEFT
+                ),
+            )
+            label = DocItemLabel(name_label)
+
+            if label == DocItemLabel.LIST_ITEM:
+                if current_list is None:
+                    current_list = doc.add_group(label=GroupLabel.LIST, name="list")
+
+                # TODO: Infer if this is a numbered or a bullet list item
+                doc.add_list_item(
+                    text=text, enumerated=False, prov=prov, parent=current_list
+                )
+            elif label == DocItemLabel.SECTION_HEADER:
+                current_list = None
+
+                doc.add_heading(text=text, prov=prov)
+            else:
+                current_list = None
+
+                doc.add_text(label=DocItemLabel(name_label), text=text, prov=prov)
+
+    return doc
+
+
+def _add_child_elements(container_el, doc, obj, pelem):
+    payload = obj.get("payload")
+    if payload is not None:
+        children = payload.get("children", [])
+
+        for child in children:
+            c_label = DocItemLabel(child["label"])
+            c_bbox = BoundingBox.model_validate(child["bbox"]).to_bottom_left_origin(
+                doc.pages[pelem["page"]].size.height
+            )
+            c_text = " ".join(
+                [
+                    cell["text"].replace("\x02", "-").strip()
+                    for cell in child["cells"]
+                    if len(cell["text"].strip()) > 0
+                ]
+            )
+
+            c_prov = ProvenanceItem(
+                page_no=pelem["page"], charspan=(0, len(c_text)), bbox=c_bbox
+            )
+            if c_label == DocItemLabel.LIST_ITEM:
+                # TODO: Infer if this is a numbered or a bullet list item
+                doc.add_list_item(parent=container_el, text=c_text, prov=c_prov)
+            elif c_label == DocItemLabel.SECTION_HEADER:
+                doc.add_heading(parent=container_el, text=c_text, prov=c_prov)
+            else:
+                doc.add_text(
+                    parent=container_el, label=c_label, text=c_text, prov=c_prov
+                )
--- a/docling/utils/layout_postprocessor.py
+++ b/docling/utils/layout_postprocessor.py
@ -0,0 +1,496 @@
+import bisect
+import logging
+import sys
+from collections import defaultdict
+from typing import Dict, List, Set, Tuple
+
+from docling_core.types.doc import DocItemLabel
+from rtree import index
+
+from docling.datamodel.base_models import BoundingBox, Cell, Cluster
+
+_log = logging.getLogger(__name__)
+
+
+class UnionFind:
+    """Efficient Union-Find data structure for grouping elements."""
+
+    def __init__(self, elements):
+        self.parent = {elem: elem for elem in elements}
+        self.rank = {elem: 0 for elem in elements}
+
+    def find(self, x):
+        if self.parent[x] != x:
+            self.parent[x] = self.find(self.parent[x])  # Path compression
+        return self.parent[x]
+
+    def union(self, x, y):
+        root_x, root_y = self.find(x), self.find(y)
+        if root_x == root_y:
+            return
+
+        if self.rank[root_x] > self.rank[root_y]:
+            self.parent[root_y] = root_x
+        elif self.rank[root_x] < self.rank[root_y]:
+            self.parent[root_x] = root_y
+        else:
+            self.parent[root_y] = root_x
+            self.rank[root_x] += 1
+
+    def get_groups(self) -> Dict[int, List[int]]:
+        """Returns groups as {root: [elements]}."""
+        groups = defaultdict(list)
+        for elem in self.parent:
+            groups[self.find(elem)].append(elem)
+        return groups
+
+
+class SpatialClusterIndex:
+    """Efficient spatial indexing for clusters using R-tree and interval trees."""
+
+    def __init__(self, clusters: List[Cluster]):
+        p = index.Property()
+        p.dimension = 2
+        self.spatial_index = index.Index(properties=p)
+        self.x_intervals = IntervalTree()
+        self.y_intervals = IntervalTree()
+        self.clusters_by_id: Dict[int, Cluster] = {}
+
+        for cluster in clusters:
+            self.add_cluster(cluster)
+
+    def add_cluster(self, cluster: Cluster):
+        bbox = cluster.bbox
+        self.spatial_index.insert(cluster.id, bbox.as_tuple())
+        self.x_intervals.insert(bbox.l, bbox.r, cluster.id)
+        self.y_intervals.insert(bbox.t, bbox.b, cluster.id)
+        self.clusters_by_id[cluster.id] = cluster
+
+    def remove_cluster(self, cluster: Cluster):
+        self.spatial_index.delete(cluster.id, cluster.bbox.as_tuple())
+        del self.clusters_by_id[cluster.id]
+
+    def find_candidates(self, bbox: BoundingBox) -> Set[int]:
+        """Find potential overlapping cluster IDs using all indexes."""
+        spatial = set(self.spatial_index.intersection(bbox.as_tuple()))
+        x_candidates = self.x_intervals.find_containing(
+            bbox.l
+        ) | self.x_intervals.find_containing(bbox.r)
+        y_candidates = self.y_intervals.find_containing(
+            bbox.t
+        ) | self.y_intervals.find_containing(bbox.b)
+        return spatial | x_candidates | y_candidates
+
+    def check_overlap(
+        self,
+        bbox1: BoundingBox,
+        bbox2: BoundingBox,
+        overlap_threshold: float,
+        containment_threshold: float,
+    ) -> bool:
+        """Check if two bboxes overlap sufficiently."""
+        area1, area2 = bbox1.area(), bbox2.area()
+        if area1 <= 0 or area2 <= 0:
+            return False
+
+        overlap_area = bbox1.intersection_area_with(bbox2)
+        if overlap_area <= 0:
+            return False
+
+        iou = overlap_area / (area1 + area2 - overlap_area)
+        containment1 = overlap_area / area1
+        containment2 = overlap_area / area2
+
+        return (
+            iou > overlap_threshold
+            or containment1 > containment_threshold
+            or containment2 > containment_threshold
+        )
+
+
+class IntervalTree:
+    """Memory-efficient interval tree for 1D overlap queries."""
+
+    def __init__(self):
+        self.intervals: List[Tuple[float, float, int]] = (
+            []
+        )  # (min, max, id) sorted by min
+
+    def insert(self, min_val: float, max_val: float, id: int):
+        bisect.insort(self.intervals, (min_val, max_val, id), key=lambda x: x[0])
+
+    def find_containing(self, point: float) -> Set[int]:
+        """Find all intervals containing the point."""
+        pos = bisect.bisect_left(self.intervals, (point, float("-inf"), -1))
+        result = set()
+
+        # Check intervals starting before point
+        for min_val, max_val, id in reversed(self.intervals[:pos]):
+            if min_val <= point <= max_val:
+                result.add(id)
+            else:
+                break
+
+        # Check intervals starting at/after point
+        for min_val, max_val, id in self.intervals[pos:]:
+            if point <= max_val:
+                if min_val <= point:
+                    result.add(id)
+            else:
+                break
+
+        return result
+
+
+class LayoutPostprocessor:
+    """Postprocesses layout predictions by cleaning up clusters and mapping cells."""
+
+    # Cluster type-specific parameters for overlap resolution
+    OVERLAP_PARAMS = {
+        "regular": {"area_threshold": 1.3, "conf_threshold": 0.05},
+        "picture": {"area_threshold": 2.0, "conf_threshold": 0.3},
+        "wrapper": {"area_threshold": 2.0, "conf_threshold": 0.2},
+    }
+
+    WRAPPER_TYPES = {DocItemLabel.FORM, DocItemLabel.KEY_VALUE_REGION}
+    SPECIAL_TYPES = WRAPPER_TYPES | {DocItemLabel.PICTURE}
+
+    CONFIDENCE_THRESHOLDS = {
+        DocItemLabel.CAPTION: 0.35,
+        DocItemLabel.FOOTNOTE: 0.35,
+        DocItemLabel.FORMULA: 0.35,
+        DocItemLabel.LIST_ITEM: 0.35,
+        DocItemLabel.PAGE_FOOTER: 0.35,
+        DocItemLabel.PAGE_HEADER: 0.35,
+        DocItemLabel.PICTURE: 0.1,
+        DocItemLabel.SECTION_HEADER: 0.45,
+        DocItemLabel.TABLE: 0.35,
+        DocItemLabel.TEXT: 0.45,
+        DocItemLabel.TITLE: 0.45,
+        DocItemLabel.CODE: 0.45,
+        DocItemLabel.CHECKBOX_SELECTED: 0.45,
+        DocItemLabel.CHECKBOX_UNSELECTED: 0.45,
+        DocItemLabel.FORM: 0.45,
+        DocItemLabel.KEY_VALUE_REGION: 0.45,
+        DocItemLabel.DOCUMENT_INDEX: 0.45,
+    }
+
+    LABEL_REMAPPING = {
+        DocItemLabel.DOCUMENT_INDEX: DocItemLabel.TABLE,
+        DocItemLabel.TITLE: DocItemLabel.SECTION_HEADER,
+    }
+
+    def __init__(self, cells: List[Cell], clusters: List[Cluster]):
+        """Initialize processor with cells and clusters."""
+        """Initialize processor with cells and spatial indices."""
+        self.cells = cells
+        self.regular_clusters = [
+            c for c in clusters if c.label not in self.SPECIAL_TYPES
+        ]
+        self.special_clusters = [c for c in clusters if c.label in self.SPECIAL_TYPES]
+
+        # Build spatial indices once
+        self.regular_index = SpatialClusterIndex(self.regular_clusters)
+        self.picture_index = SpatialClusterIndex(
+            [c for c in self.special_clusters if c.label == DocItemLabel.PICTURE]
+        )
+        self.wrapper_index = SpatialClusterIndex(
+            [c for c in self.special_clusters if c.label in self.WRAPPER_TYPES]
+        )
+
+    def postprocess(self) -> Tuple[List[Cluster], List[Cell]]:
+        """Main processing pipeline."""
+        self.regular_clusters = self._process_regular_clusters()
+        self.special_clusters = self._process_special_clusters()
+
+        # Remove regular clusters that are included in wrappers
+        contained_ids = {
+            child.id
+            for wrapper in self.special_clusters
+            if wrapper.label in self.SPECIAL_TYPES
+            for child in wrapper.children
+        }
+        self.regular_clusters = [
+            c for c in self.regular_clusters if c.id not in contained_ids
+        ]
+
+        # Combine and sort final clusters
+        final_clusters = self._sort_clusters(
+            self.regular_clusters + self.special_clusters
+        )
+        return final_clusters, self.cells
+
+    def _process_regular_clusters(self) -> List[Cluster]:
+        """Process regular clusters with iterative refinement."""
+        clusters = [
+            c
+            for c in self.regular_clusters
+            if c.confidence >= self.CONFIDENCE_THRESHOLDS[c.label]
+        ]
+
+        # Apply label remapping
+        for cluster in clusters:
+            if cluster.label in self.LABEL_REMAPPING:
+                cluster.label = self.LABEL_REMAPPING[cluster.label]
+
+        # Initial cell assignment
+        clusters = self._assign_cells_to_clusters(clusters)
+
+        # Handle orphaned cells
+        unassigned = self._find_unassigned_cells(clusters)
+        if unassigned:
+            next_id = max((c.id for c in clusters), default=0) + 1
+            orphan_clusters = [
+                Cluster(
+                    id=next_id + i,
+                    label=DocItemLabel.TEXT,
+                    bbox=cell.bbox,
+                    confidence=0.0,
+                    cells=[cell],
+                )
+                for i, cell in enumerate(unassigned)
+            ]
+            clusters.extend(orphan_clusters)
+
+        # Iterative refinement
+        prev_count = len(clusters) + 1
+        for _ in range(3):  # Maximum 3 iterations
+            if prev_count == len(clusters):
+                break
+            prev_count = len(clusters)
+            clusters = self._adjust_cluster_bboxes(clusters)
+            clusters = self._remove_overlapping_clusters(clusters, "regular")
+
+        return clusters
+
+    def _process_special_clusters(self) -> List[Cluster]:
+        special_clusters = [
+            c
+            for c in self.special_clusters
+            if c.confidence >= self.CONFIDENCE_THRESHOLDS[c.label]
+        ]
+
+        for special in special_clusters:
+            contained = []
+            for cluster in self.regular_clusters:
+                overlap = cluster.bbox.intersection_area_with(special.bbox)
+                if overlap > 0:
+                    containment = overlap / cluster.bbox.area()
+                    if containment > 0.8:
+                        contained.append(cluster)
+
+            if contained:
+                # Sort contained clusters by minimum cell ID
+                contained.sort(
+                    key=lambda cluster: (
+                        min(cell.id for cell in cluster.cells)
+                        if cluster.cells
+                        else sys.maxsize
+                    )
+                )
+                special.children = contained
+
+                # Adjust bbox only for wrapper types
+                if special.label in self.WRAPPER_TYPES:
+                    special.bbox = BoundingBox(
+                        l=min(c.bbox.l for c in contained),
+                        t=min(c.bbox.t for c in contained),
+                        r=max(c.bbox.r for c in contained),
+                        b=max(c.bbox.b for c in contained),
+                    )
+
+        picture_clusters = [
+            c for c in special_clusters if c.label == DocItemLabel.PICTURE
+        ]
+        picture_clusters = self._remove_overlapping_clusters(
+            picture_clusters, "picture"
+        )
+
+        wrapper_clusters = [
+            c for c in special_clusters if c.label in self.WRAPPER_TYPES
+        ]
+        wrapper_clusters = self._remove_overlapping_clusters(
+            wrapper_clusters, "wrapper"
+        )
+
+        return picture_clusters + wrapper_clusters
+
+    def _remove_overlapping_clusters(
+        self,
+        clusters: List[Cluster],
+        cluster_type: str,
+        overlap_threshold: float = 0.8,
+        containment_threshold: float = 0.8,
+    ) -> List[Cluster]:
+        if not clusters:
+            return []
+
+        spatial_index = (
+            self.regular_index
+            if cluster_type == "regular"
+            else self.picture_index if cluster_type == "picture" else self.wrapper_index
+        )
+
+        # Map of currently valid clusters
+        valid_clusters = {c.id: c for c in clusters}
+        uf = UnionFind(valid_clusters.keys())
+        params = self.OVERLAP_PARAMS[cluster_type]
+
+        for cluster in clusters:
+            candidates = spatial_index.find_candidates(cluster.bbox)
+            candidates &= valid_clusters.keys()  # Only keep existing candidates
+            candidates.discard(cluster.id)
+
+            for other_id in candidates:
+                if spatial_index.check_overlap(
+                    cluster.bbox,
+                    valid_clusters[other_id].bbox,
+                    overlap_threshold,
+                    containment_threshold,
+                ):
+                    uf.union(cluster.id, other_id)
+
+        result = []
+        for group in uf.get_groups().values():
+            if len(group) == 1:
+                result.append(valid_clusters[group[0]])
+                continue
+
+            group_clusters = [valid_clusters[cid] for cid in group]
+            current_best = None
+
+            for candidate in group_clusters:
+                should_select = True
+                for other in group_clusters:
+                    if other == candidate:
+                        continue
+
+                    area_ratio = candidate.bbox.area() / other.bbox.area()
+                    conf_diff = other.confidence - candidate.confidence
+
+                    if (
+                        area_ratio <= params["area_threshold"]
+                        and conf_diff > params["conf_threshold"]
+                    ):
+                        should_select = False
+                        break
+
+                if should_select:
+                    if current_best is None or (
+                        candidate.bbox.area() > current_best.bbox.area()
+                        and current_best.confidence - candidate.confidence
+                        <= params["conf_threshold"]
+                    ):
+                        current_best = candidate
+
+            best = current_best if current_best else group_clusters[0]
+            for cluster in group_clusters:
+                if cluster != best:
+                    best.cells.extend(cluster.cells)
+            result.append(best)
+
+        return result
+
+    def _select_best_cluster(
+        self,
+        clusters: List[Cluster],
+        area_threshold: float,
+        conf_threshold: float,
+    ) -> Cluster:
+        """Iteratively select best cluster based on area and confidence thresholds."""
+        current_best = None
+        for candidate in clusters:
+            should_select = True
+            for other in clusters:
+                if other == candidate:
+                    continue
+
+                area_ratio = candidate.bbox.area() / other.bbox.area()
+                conf_diff = other.confidence - candidate.confidence
+
+                if area_ratio <= area_threshold and conf_diff > conf_threshold:
+                    should_select = False
+                    break
+
+            if should_select:
+                if current_best is None or (
+                    candidate.bbox.area() > current_best.bbox.area()
+                    and current_best.confidence - candidate.confidence <= conf_threshold
+                ):
+                    current_best = candidate
+
+        return current_best if current_best else clusters[0]
+
+    def _assign_cells_to_clusters(
+        self, clusters: List[Cluster], min_overlap: float = 0.2
+    ) -> List[Cluster]:
+        """Assign cells to best overlapping cluster."""
+        for cluster in clusters:
+            cluster.cells = []
+
+        for cell in self.cells:
+            if not cell.text.strip():
+                continue
+
+            best_overlap = min_overlap
+            best_cluster = None
+
+            for cluster in clusters:
+                if cell.bbox.area() <= 0:
+                    continue
+
+                overlap = cell.bbox.intersection_area_with(cluster.bbox)
+                overlap_ratio = overlap / cell.bbox.area()
+
+                if overlap_ratio > best_overlap:
+                    best_overlap = overlap_ratio
+                    best_cluster = cluster
+
+            if best_cluster is not None:
+                best_cluster.cells.append(cell)
+
+        return clusters
+
+    def _find_unassigned_cells(self, clusters: List[Cluster]) -> List[Cell]:
+        """Find cells not assigned to any cluster."""
+        assigned = {cell.id for cluster in clusters for cell in cluster.cells}
+        return [
+            cell for cell in self.cells if cell.id not in assigned and cell.text.strip()
+        ]
+
+    def _adjust_cluster_bboxes(self, clusters: List[Cluster]) -> List[Cluster]:
+        """Adjust cluster bounding boxes to contain their cells."""
+        for cluster in clusters:
+            if not cluster.cells:
+                continue
+
+            cells_bbox = BoundingBox(
+                l=min(cell.bbox.l for cell in cluster.cells),
+                t=min(cell.bbox.t for cell in cluster.cells),
+                r=max(cell.bbox.r for cell in cluster.cells),
+                b=max(cell.bbox.b for cell in cluster.cells),
+            )
+
+            if cluster.label == DocItemLabel.TABLE:
+                # For tables, take union of current bbox and cells bbox
+                cluster.bbox = BoundingBox(
+                    l=min(cluster.bbox.l, cells_bbox.l),
+                    t=min(cluster.bbox.t, cells_bbox.t),
+                    r=max(cluster.bbox.r, cells_bbox.r),
+                    b=max(cluster.bbox.b, cells_bbox.b),
+                )
+            else:
+                cluster.bbox = cells_bbox
+
+        return clusters
+
+    def _sort_clusters(self, clusters: List[Cluster]) -> List[Cluster]:
+        """Sort clusters in reading order (top-to-bottom, left-to-right)."""
+
+        def reading_order_key(cluster: Cluster) -> Tuple[float, float]:
+            if cluster.cells and cluster.label != DocItemLabel.PICTURE:
+                first_cell = min(cluster.cells, key=lambda c: (c.bbox.t, c.bbox.l))
+                return (first_cell.bbox.t, first_cell.bbox.l)
+            return (cluster.bbox.t, cluster.bbox.l)
+
+        return sorted(clusters, key=reading_order_key)
--- a/docling/utils/layout_utils.py
+++ b/docling/utils/layout_utils.py
@ -1,812 +0,0 @@
-import copy
-import logging
-
-import networkx as nx
-from docling_core.types.doc import DocItemLabel
-
-logger = logging.getLogger("layout_utils")
-
-
-## -------------------------------
-## Geometric helper functions
-## The coordinates grow left to right, and bottom to top.
-## The bounding box list elements 0 to 3 are x_left, y_bottom, x_right, y_top.
-
-
-def area(bbox):
-    return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
-
-
-def contains(bbox_i, bbox_j):
-    ## Returns True if bbox_i contains bbox_j, else False
-    return (
-        bbox_i[0] <= bbox_j[0]
-        and bbox_i[1] <= bbox_j[1]
-        and bbox_i[2] >= bbox_j[2]
-        and bbox_i[3] >= bbox_j[3]
-    )
-
-
-def is_intersecting(bbox_i, bbox_j):
-    return not (
-        bbox_i[2] < bbox_j[0]
-        or bbox_i[0] > bbox_j[2]
-        or bbox_i[3] < bbox_j[1]
-        or bbox_i[1] > bbox_j[3]
-    )
-
-
-def bb_iou(boxA, boxB):
-    # determine the (x, y)-coordinates of the intersection rectangle
-    xA = max(boxA[0], boxB[0])
-    yA = max(boxA[1], boxB[1])
-    xB = min(boxA[2], boxB[2])
-    yB = min(boxA[3], boxB[3])
-    # compute the area of intersection rectangle
-    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
-    # compute the area of both the prediction and ground-truth
-    # rectangles
-    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
-    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
-    # compute the intersection over union by taking the intersection
-    # area and dividing it by the sum of prediction + ground-truth
-    # areas - the interesection area
-    iou = interArea / float(boxAArea + boxBArea - interArea)
-    # return the intersection over union value
-    return iou
-
-
-def compute_intersection(bbox_i, bbox_j):
-    ## Returns the size of the intersection area of the two boxes
-    if not is_intersecting(bbox_i, bbox_j):
-        return 0
-    ## Determine the (x, y)-coordinates of the intersection rectangle:
-    xA = max(bbox_i[0], bbox_j[0])
-    yA = max(bbox_i[1], bbox_j[1])
-    xB = min(bbox_i[2], bbox_j[2])
-    yB = min(bbox_i[3], bbox_j[3])
-    ## Compute the area of intersection rectangle:
-    interArea = (xB - xA) * (yB - yA)
-    if interArea < 0:
-        logger.debug("Warning: Negative intersection detected!")
-        return 0
-    return interArea
-
-
-def surrounding(bbox_i, bbox_j):
-    ## Computes minimal box that contains both input boxes
-    sbox = []
-    sbox.append(min(bbox_i[0], bbox_j[0]))
-    sbox.append(min(bbox_i[1], bbox_j[1]))
-    sbox.append(max(bbox_i[2], bbox_j[2]))
-    sbox.append(max(bbox_i[3], bbox_j[3]))
-    return sbox
-
-
-def surrounding_list(bbox_list):
-    ## Computes minimal box that contains all boxes in the input list
-    ## The list should be non-empty, but just in case it's not:
-    if len(bbox_list) == 0:
-        sbox = [0, 0, 0, 0]
-    else:
-        sbox = []
-        sbox.append(min([bbox[0] for bbox in bbox_list]))
-        sbox.append(min([bbox[1] for bbox in bbox_list]))
-        sbox.append(max([bbox[2] for bbox in bbox_list]))
-        sbox.append(max([bbox[3] for bbox in bbox_list]))
-    return sbox
-
-
-def vertical_overlap(bboxA, bboxB):
-    ## bbox[1] is the lower bound, bbox[3] the upper bound (larger number)
-    if bboxB[3] < bboxA[1]:  ## B below A
-        return False
-    elif bboxA[3] < bboxB[1]:  ## A below B
-        return False
-    else:
-        return True
-
-
-def vertical_overlap_fraction(bboxA, bboxB):
-    ## Returns the vertical overlap as fraction of the lower bbox height.
-    ## bbox[1] is the lower bound, bbox[3] the upper bound (larger number)
-    ## Height 0 is permitted in the input.
-    heightA = bboxA[3] - bboxA[1]
-    heightB = bboxB[3] - bboxB[1]
-    min_height = min(heightA, heightB)
-    if bboxA[3] >= bboxB[3]:  ## A starts higher or equal
-        if (
-            bboxA[1] <= bboxB[1]
-        ):  ## B is completely in A; this can include height of B = 0:
-            fraction = 1
-        else:
-            overlap = max(bboxB[3] - bboxA[1], 0)
-            fraction = overlap / max(min_height, 0.001)
-    else:
-        if (
-            bboxB[1] <= bboxA[1]
-        ):  ## A is completely in B; this can include height of A = 0:
-            fraction = 1
-        else:
-            overlap = max(bboxA[3] - bboxB[1], 0)
-            fraction = overlap / max(min_height, 0.001)
-    return fraction
-
-
-## -------------------------------
-## Cluster-and-cell relations
-
-
-def compute_enclosed_cells(
-    cluster_bbox, raw_cells, min_cell_intersection_with_cluster=0.2
-):
-    cells_in_cluster = []
-    cells_in_cluster_int = []
-    for ix, cell in enumerate(raw_cells):
-        cell_bbox = cell["bbox"]
-        intersection = compute_intersection(cell_bbox, cluster_bbox)
-        frac_area = area(cell_bbox) * min_cell_intersection_with_cluster
-
-        if (
-            intersection > frac_area and frac_area > 0
-        ):  # intersect > certain fraction of cell
-            cells_in_cluster.append(ix)
-            cells_in_cluster_int.append(intersection)
-        elif contains(
-            cluster_bbox,
-            [cell_bbox[0] + 3, cell_bbox[1] + 3, cell_bbox[2] - 3, cell_bbox[3] - 3],
-        ):
-            cells_in_cluster.append(ix)
-    return cells_in_cluster, cells_in_cluster_int
-
-
-def find_clusters_around_cells(cell_count, clusters):
-    ## Per raw cell, find to which clusters it belongs.
-    ## Return list of these indices in the raw-cell order.
-    clusters_around_cells = [[] for _ in range(cell_count)]
-    for cl_ix, cluster in enumerate(clusters):
-        for ix in cluster["cell_ids"]:
-            clusters_around_cells[ix].append(cl_ix)
-    return clusters_around_cells
-
-
-def find_cell_index(raw_ix, cell_array):
-    ## "raw_ix" is a rawcell_id.
-    ## "cell_array" has the structure of an (annotation) cells array.
-    ## Returns index of cell in cell_array that has this rawcell_id.
-    for ix, cell in enumerate(cell_array):
-        if cell["rawcell_id"] == raw_ix:
-            return ix
-
-
-def find_cell_indices(cluster, cell_array):
-    ## "cluster" must have the structure as in a clusters array in a prediction,
-    ## "cell_array" that of a cells array.
-    ## Returns list of indices of cells in cell_array that have the rawcell_ids as in the cluster,
-    ## in the order of the rawcell_ids.
-    result = []
-    for raw_ix in sorted(cluster["cell_ids"]):
-        ## Find the cell with this rawcell_id (if any)
-        for ix, cell in enumerate(cell_array):
-            if cell["rawcell_id"] == raw_ix:
-                result.append(ix)
-    return result
-
-
-def find_first_cell_index(cluster, cell_array):
-    ## "cluster" must be a dict with key "cell_ids"; it can also be a line.
-    ## "cell_array" has the structure of a cells array in an annotation.
-    ## Returns index of cell in cell_array that has the lowest rawcell_id from the cluster.
-    result = []  ## We keep it a list as it can be empty (picture without text cells)
-    if len(cluster["cell_ids"]) == 0:
-        return result
-    raw_ix = min(cluster["cell_ids"])
-    ## Find the cell with this rawcell_id (if any)
-    for ix, cell in enumerate(cell_array):
-        if cell["rawcell_id"] == raw_ix:
-            result.append(ix)
-            break  ## One is enough; should be only one anyway.
-    if result == []:
-        logger.debug(
-            "  Warning: Raw cell " + str(raw_ix) + " not found in annotation cells"
-        )
-    return result
-
-
-## -------------------------------
-## Cluster labels and text
-
-
-def relabel_cluster(cluster, cl_ix, new_label, target_pred):
-    ## "cluster" must have the structure as in a clusters array in a prediction,
-    ## "cl_ix" is its index in target_pred,
-    ## "new_label" is the intended new label,
-    ## "target_pred" is the entire current target prediction.
-    ## Sets label on the cluster itself, and on the cells in the target_pred.
-    ## Returns new_label so that also the cl_label variable in the main code is easily set.
-    target_pred["clusters"][cl_ix]["type"] = new_label
-    cluster_target_cells = find_cell_indices(cluster, target_pred["cells"])
-    for ix in cluster_target_cells:
-        target_pred["cells"][ix]["label"] = new_label
-    return new_label
-
-
-def find_cluster_text(cluster, raw_cells):
-    ## "cluster" must be a dict with "cell_ids"; it can also be a line.
-    ## "raw_cells" must have the format of item["raw"]["cells"]
-    ## Returns the text of the cluster, with blanks between the cell contents
-    ## (which seem to be words or phrases without starting or trailing blanks).
-    ## Note that in formulas, this may give a lot more blanks than originally
-    cluster_text = ""
-    for raw_ix in sorted(cluster["cell_ids"]):
-        cluster_text = cluster_text + raw_cells[raw_ix]["text"] + " "
-    return cluster_text.rstrip()
-
-
-def find_cluster_text_without_blanks(cluster, raw_cells):
-    ## "cluster" must be a dict with "cell_ids"; it can also be a line.
-    ## "raw_cells" must have the format of item["raw"]["cells"]
-    ## Returns the text of the cluster, without blanks between the cell contents
-    ## Interesting in formula analysis.
-    cluster_text = ""
-    for raw_ix in sorted(cluster["cell_ids"]):
-        cluster_text = cluster_text + raw_cells[raw_ix]["text"]
-    return cluster_text.rstrip()
-
-
-## -------------------------------
-## Clusters and lines
-## (Most line-oriented functions are only needed in TextAnalysisGivenClusters,
-##  but this one also in FormulaAnalysis)
-
-
-def build_cluster_from_lines(lines, label, id):
-    ## Lines must be a non-empty list of dicts (lines) with elements "cell_ids" and "bbox"
-    ## (There is no condition that they are really geometrically lines)
-    ## A cluster in standard format is returned with given label and id
-    local_lines = copy.deepcopy(
-        lines
-    )  ## without this, it changes "lines" also outside this function
-    first_line = local_lines.pop(0)
-    cluster = {
-        "id": id,
-        "type": label,
-        "cell_ids": first_line["cell_ids"],
-        "bbox": first_line["bbox"],
-        "confidence": 0,
-        "created_by": "merged_cells",
-    }
-    confidence = 0
-    counter = 0
-    for line in local_lines:
-        new_cell_ids = cluster["cell_ids"] + line["cell_ids"]
-        cluster["cell_ids"] = new_cell_ids
-        cluster["bbox"] = surrounding(cluster["bbox"], line["bbox"])
-        counter += 1
-        confidence += line["confidence"]
-    confidence = confidence / counter
-    cluster["confidence"] = confidence
-    return cluster
-
-
-## -------------------------------
-## Reading order
-
-
-def produce_reading_order(clusters, cluster_sort_type, cell_sort_type, sort_ids):
-    ## In:
-    ##   Clusters: list as in predictions.
-    ##   cluster_sort_type: string, currently only "raw_cells".
-    ##   cell_sort_type: string, currently only "raw_cells".
-    ##   sort_ids: Boolean, whether the cluster ids should be adapted to their new position
-    ## Out: Another clusters list, sorted according to the type.
-
-    logger.debug("---- Start cluster sorting ------")
-
-    if cell_sort_type == "raw_cell_ids":
-        for cl in clusters:
-            sorted_cell_ids = sorted(cl["cell_ids"])
-            cl["cell_ids"] = sorted_cell_ids
-    else:
-        logger.debug(
-            "Unknown cell_sort_type `"
-            + cell_sort_type
-            + "`, no cell sorting will happen."
-        )
-
-    if cluster_sort_type == "raw_cell_ids":
-        clusters_with_cells = [cl for cl in clusters if cl["cell_ids"] != []]
-        clusters_without_cells = [cl for cl in clusters if cl["cell_ids"] == []]
-        logger.debug(
-            "Clusters with cells: " + str([cl["id"] for cl in clusters_with_cells])
-        )
-        logger.debug(
-            "  Their first cell ids: "
-            + str([cl["cell_ids"][0] for cl in clusters_with_cells])
-        )
-        logger.debug(
-            "Clusters without cells: "
-            + str([cl["id"] for cl in clusters_without_cells])
-        )
-        clusters_with_cells_sorted = sorted(
-            clusters_with_cells, key=lambda cluster: cluster["cell_ids"][0]
-        )
-        logger.debug(
-            "  First cell ids after sorting: "
-            + str([cl["cell_ids"][0] for cl in clusters_with_cells_sorted])
-        )
-        sorted_clusters = clusters_with_cells_sorted + clusters_without_cells
-    else:
-        logger.debug(
-            "Unknown cluster_sort_type: `"
-            + cluster_sort_type
-            + "`, no cluster sorting will happen."
-        )
-
-    if sort_ids:
-        for i, cl in enumerate(sorted_clusters):
-            cl["id"] = i
-    return sorted_clusters
-
-
-## -------------------------------
-## Line Splitting
-
-
-def sort_cells_horizontal(line_cell_ids, raw_cells):
-    ## "line_cells" should be a non-empty list of (raw) cell_ids
-    ## "raw_cells" has the structure of item["raw"]["cells"].
-    ## Sorts the cells in the line by x0 (left start).
-    new_line_cell_ids = sorted(
-        line_cell_ids, key=lambda cell_id: raw_cells[cell_id]["bbox"][0]
-    )
-    return new_line_cell_ids
-
-
-def adapt_bboxes(raw_cells, clusters, orphan_cell_indices):
-    new_clusters = []
-    for ix, cluster in enumerate(clusters):
-        new_cluster = copy.deepcopy(cluster)
-        logger.debug(
-            "Treating cluster " + str(ix) + ", type " + str(new_cluster["type"])
-        )
-        logger.debug("  with cells: " + str(new_cluster["cell_ids"]))
-        if len(cluster["cell_ids"]) == 0 and cluster["type"] != DocItemLabel.PICTURE:
-            logger.debug("  Empty non-picture, removed")
-            continue  ## Skip this former cluster, now without cells.
-        new_bbox = adapt_bbox(raw_cells, new_cluster, orphan_cell_indices)
-        new_cluster["bbox"] = new_bbox
-        new_clusters.append(new_cluster)
-    return new_clusters
-
-
-def adapt_bbox(raw_cells, cluster, orphan_cell_indices):
-    if not (cluster["type"] in [DocItemLabel.TABLE, DocItemLabel.PICTURE]):
-        ## A text-like cluster. The bbox only needs to be around the text cells:
-        logger.debug("    Initial bbox: " + str(cluster["bbox"]))
-        new_bbox = surrounding_list(
-            [raw_cells[cid]["bbox"] for cid in cluster["cell_ids"]]
-        )
-        logger.debug("  New bounding box:" + str(new_bbox))
-    if cluster["type"] == DocItemLabel.PICTURE:
-        ## We only make the bbox completely comprise included text cells:
-        logger.debug("  Picture")
-        if len(cluster["cell_ids"]) != 0:
-            min_bbox = surrounding_list(
-                [raw_cells[cid]["bbox"] for cid in cluster["cell_ids"]]
-            )
-            logger.debug("    Minimum bbox: " + str(min_bbox))
-            logger.debug("    Initial bbox: " + str(cluster["bbox"]))
-            new_bbox = surrounding(min_bbox, cluster["bbox"])
-            logger.debug("    New bbox (initial and text cells): " + str(new_bbox))
-        else:
-            logger.debug("    without text cells, no change.")
-            new_bbox = cluster["bbox"]
-    else:  ## A table
-        ## At least we have to keep the included text cells, and we make the bbox completely comprise them
-        min_bbox = surrounding_list(
-            [raw_cells[cid]["bbox"] for cid in cluster["cell_ids"]]
-        )
-        logger.debug("    Minimum bbox: " + str(min_bbox))
-        logger.debug("    Initial bbox: " + str(cluster["bbox"]))
-        new_bbox = surrounding(min_bbox, cluster["bbox"])
-        logger.debug("    Possibly increased bbox: " + str(new_bbox))
-
-        ## Now we look which non-belonging cells are covered.
-        ## (To decrease dependencies, we don't make use of which cells we actually removed.)
-        ## We don't worry about orphan cells, those could still be added to the table.
-        enclosed_cells = compute_enclosed_cells(
-            new_bbox, raw_cells, min_cell_intersection_with_cluster=0.3
-        )[0]
-        additional_cells = set(enclosed_cells) - set(cluster["cell_ids"])
-        logger.debug(
-            "    Additional cells enclosed by Table bbox: " + str(additional_cells)
-        )
-        spurious_cells = additional_cells - set(orphan_cell_indices)
-        logger.debug(
-            "    Spurious cells enclosed by Table bbox (additional minus orphans): "
-            + str(spurious_cells)
-        )
-        if len(spurious_cells) == 0:
-            return new_bbox
-
-        ## Else we want to keep as much as possible, e.g., grid lines, but not the spurious cells if we can.
-        ## We initialize possible cuts with the current bbox.
-        left_cut = new_bbox[0]
-        right_cut = new_bbox[2]
-        upper_cut = new_bbox[3]
-        lower_cut = new_bbox[1]
-
-        for cell_ix in spurious_cells:
-            cell = raw_cells[cell_ix]
-            # logger.debug("     Spurious cell bbox: " + str(cell["bbox"]))
-            is_left = cell["bbox"][2] < min_bbox[0]
-            is_right = cell["bbox"][0] > min_bbox[2]
-            is_above = cell["bbox"][1] > min_bbox[3]
-            is_below = cell["bbox"][3] < min_bbox[1]
-            # logger.debug("      Left, right, above, below? " + str([is_left, is_right, is_above, is_below]))
-
-            if is_left:
-                if cell["bbox"][2] > left_cut:
-                    ## We move the left cut to exclude this cell:
-                    left_cut = cell["bbox"][2]
-            if is_right:
-                if cell["bbox"][0] < right_cut:
-                    ## We move the right cut to exclude this cell:
-                    right_cut = cell["bbox"][0]
-            if is_above:
-                if cell["bbox"][1] < upper_cut:
-                    ## We move the upper cut to exclude this cell:
-                    upper_cut = cell["bbox"][1]
-            if is_below:
-                if cell["bbox"][3] > lower_cut:
-                    ## We move the left cut to exclude this cell:
-                    lower_cut = cell["bbox"][3]
-            # logger.debug("      Current bbox: " + str([left_cut, lower_cut, right_cut, upper_cut]))
-
-            new_bbox = [left_cut, lower_cut, right_cut, upper_cut]
-
-        logger.debug("   Final bbox: " + str(new_bbox))
-    return new_bbox
-
-
-def remove_cluster_duplicates_by_conf(cluster_predictions, threshold=0.5):
-    DuplicateDeletedClusterIDs = []
-    for cluster_1 in cluster_predictions:
-        for cluster_2 in cluster_predictions:
-            if cluster_1["id"] != cluster_2["id"]:
-                if_conf = False
-                if cluster_1["confidence"] > cluster_2["confidence"]:
-                    if_conf = True
-                if if_conf == True:
-                    if bb_iou(cluster_1["bbox"], cluster_2["bbox"]) > threshold:
-                        DuplicateDeletedClusterIDs.append(cluster_2["id"])
-                    elif contains(
-                        cluster_1["bbox"],
-                        [
-                            cluster_2["bbox"][0] + 3,
-                            cluster_2["bbox"][1] + 3,
-                            cluster_2["bbox"][2] - 3,
-                            cluster_2["bbox"][3] - 3,
-                        ],
-                    ):
-                        DuplicateDeletedClusterIDs.append(cluster_2["id"])
-
-    DuplicateDeletedClusterIDs = list(set(DuplicateDeletedClusterIDs))
-
-    for cl_id in DuplicateDeletedClusterIDs:
-        for cluster in cluster_predictions:
-            if cl_id == cluster["id"]:
-                cluster_predictions.remove(cluster)
-    return cluster_predictions
-
-
-# Assign orphan cells by a low confidence prediction that is below the assigned confidence
-def assign_orphans_with_low_conf_pred(
-    cluster_predictions, cluster_predictions_low, raw_cells, orphan_cell_indices
-):
-    for orph_id in orphan_cell_indices:
-        cluster_chosen = {}
-        iou_thresh = 0.05
-        confidence = 0.05
-
-        # Loop over all predictions, and find the one with the highest IOU, and confidence
-        for cluster in cluster_predictions_low:
-            calc_iou = bb_iou(cluster["bbox"], raw_cells[orph_id]["bbox"])
-            cluster_area = (cluster["bbox"][3] - cluster["bbox"][1]) * (
-                cluster["bbox"][2] - cluster["bbox"][0]
-            )
-            cell_area = (
-                raw_cells[orph_id]["bbox"][3] - raw_cells[orph_id]["bbox"][1]
-            ) * (raw_cells[orph_id]["bbox"][2] - raw_cells[orph_id]["bbox"][0])
-
-            if (
-                (iou_thresh < calc_iou)
-                and (cluster["confidence"] > confidence)
-                and (cell_area * 3 > cluster_area)
-            ):
-                cluster_chosen = cluster
-                iou_thresh = calc_iou
-                confidence = cluster["confidence"]
-        # If a candidate is found, assign to it the PDF cell ids, and tag that it was created by this function for tracking
-        if iou_thresh != 0.05 and confidence != 0.05:
-            cluster_chosen["cell_ids"].append(orph_id)
-            cluster_chosen["created_by"] = "orph_low_conf"
-            cluster_predictions.append(cluster_chosen)
-            orphan_cell_indices.remove(orph_id)
-    return cluster_predictions, orphan_cell_indices
-
-
-def remove_ambigous_pdf_cell_by_conf(cluster_predictions, raw_cells, amb_cell_idxs):
-    for amb_cell_id in amb_cell_idxs:
-        highest_conf = 0
-        highest_bbox_iou = 0
-        cluster_chosen = None
-        problamatic_clusters = []
-
-        # Find clusters in question
-        for cluster in cluster_predictions:
-
-            if amb_cell_id in cluster["cell_ids"]:
-                problamatic_clusters.append(amb_cell_id)
-
-                # If the cell_id is in a cluster of high conf, and highest iou score, and smaller in area
-                bbox_iou_val = bb_iou(cluster["bbox"], raw_cells[amb_cell_id]["bbox"])
-
-                if (
-                    cluster["confidence"] > highest_conf
-                    and bbox_iou_val > highest_bbox_iou
-                ):
-                    cluster_chosen = cluster
-                    highest_conf = cluster["confidence"]
-                    highest_bbox_iou = bbox_iou_val
-                    if cluster["id"] in problamatic_clusters:
-                        problamatic_clusters.remove(cluster["id"])
-
-        # now remove the assigning of cell id from lower confidence, and threshold
-        for cluster in cluster_predictions:
-            for prob_amb_id in problamatic_clusters:
-                if prob_amb_id in cluster["cell_ids"]:
-                    cluster["cell_ids"].remove(prob_amb_id)
-        amb_cell_idxs.remove(amb_cell_id)
-
-    return cluster_predictions, amb_cell_idxs
-
-
-def ranges(nums):
-    # Find if consecutive numbers exist within pdf cells
-    # Used to remove line numbers for review manuscripts
-    nums = sorted(set(nums))
-    gaps = [[s, e] for s, e in zip(nums, nums[1:]) if s + 1 < e]
-    edges = iter(nums[:1] + sum(gaps, []) + nums[-1:])
-    return list(zip(edges, edges))
-
-
-def set_orphan_as_text(
-    cluster_predictions, cluster_predictions_low, raw_cells, orphan_cell_indices
-):
-    max_id = -1
-    figures = []
-    for cluster in cluster_predictions:
-        if cluster["type"] == DocItemLabel.PICTURE:
-            figures.append(cluster)
-
-        if cluster["id"] > max_id:
-            max_id = cluster["id"]
-    max_id += 1
-
-    lines_detector = False
-    content_of_orphans = []
-    for orph_id in orphan_cell_indices:
-        orph_cell = raw_cells[orph_id]
-        content_of_orphans.append(raw_cells[orph_id]["text"])
-
-    fil_content_of_orphans = []
-    for cell_content in content_of_orphans:
-        if cell_content.isnumeric():
-            try:
-                num = int(cell_content)
-                fil_content_of_orphans.append(num)
-            except ValueError:  # ignore the cell
-                pass
-
-    # line_orphans = []
-    #  Check if there are more than 2 pdf orphan cells, if there are more than 2,
-    #  then check between the orphan cells if they are numeric
-    # and if they are a consecutive series of numbers (using ranges function) to decide
-
-    if len(fil_content_of_orphans) > 2:
-        out_ranges = ranges(fil_content_of_orphans)
-        if len(out_ranges) > 1:
-            cnt_range = 0
-            for ranges_ in out_ranges:
-                if ranges_[0] != ranges_[1]:
-                    # If there are more than 75 (half the total line number of a review manuscript page)
-                    # decide that there are line numbers on page to be ignored.
-                    if len(list(range(ranges_[0], ranges_[1]))) > 75:
-                        lines_detector = True
-                        # line_orphans = line_orphans + list(range(ranges_[0], ranges_[1]))
-
-    for orph_id in orphan_cell_indices:
-        orph_cell = raw_cells[orph_id]
-        if bool(orph_cell["text"] and not orph_cell["text"].isspace()):
-            fig_flag = False
-            # Do not assign orphan cells if they are inside a figure
-            for fig in figures:
-                if contains(fig["bbox"], orph_cell["bbox"]):
-                    fig_flag = True
-
-            # if fig_flag == False and raw_cells[orph_id]["text"] not in line_orphans:
-            if fig_flag == False and lines_detector == False:
-                # get class from low confidence detections if not set as text:
-                class_type = DocItemLabel.TEXT
-
-                for cluster in cluster_predictions_low:
-                    intersection = compute_intersection(
-                        orph_cell["bbox"], cluster["bbox"]
-                    )
-                    class_type = DocItemLabel.TEXT
-                    if (
-                        cluster["confidence"] > 0.1
-                        and bb_iou(cluster["bbox"], orph_cell["bbox"]) > 0.4
-                    ):
-                        class_type = cluster["type"]
-                    elif contains(
-                        cluster["bbox"],
-                        [
-                            orph_cell["bbox"][0] + 3,
-                            orph_cell["bbox"][1] + 3,
-                            orph_cell["bbox"][2] - 3,
-                            orph_cell["bbox"][3] - 3,
-                        ],
-                    ):
-                        class_type = cluster["type"]
-                    elif intersection > area(orph_cell["bbox"]) * 0.2:
-                        class_type = cluster["type"]
-
-                new_cluster = {
-                    "id": max_id,
-                    "bbox": orph_cell["bbox"],
-                    "type": class_type,
-                    "cell_ids": [orph_id],
-                    "confidence": -1,
-                    "created_by": "orphan_default",
-                }
-                max_id += 1
-                cluster_predictions.append(new_cluster)
-    return cluster_predictions, orphan_cell_indices
-
-
-def merge_cells(cluster_predictions):
-    # Using graph component creates clusters if orphan cells are touching or too close.
-    G = nx.Graph()
-    for cluster in cluster_predictions:
-        if cluster["created_by"] == "orphan_default":
-            G.add_node(cluster["id"])
-
-    for cluster_1 in cluster_predictions:
-        for cluster_2 in cluster_predictions:
-            if (
-                cluster_1["id"] != cluster_2["id"]
-                and cluster_2["created_by"] == "orphan_default"
-                and cluster_1["created_by"] == "orphan_default"
-            ):
-                cl1 = copy.deepcopy(cluster_1["bbox"])
-                cl2 = copy.deepcopy(cluster_2["bbox"])
-                cl1[0] = cl1[0] - 2
-                cl1[1] = cl1[1] - 2
-                cl1[2] = cl1[2] + 2
-                cl1[3] = cl1[3] + 2
-                cl2[0] = cl2[0] - 2
-                cl2[1] = cl2[1] - 2
-                cl2[2] = cl2[2] + 2
-                cl2[3] = cl2[3] + 2
-                if is_intersecting(cl1, cl2):
-                    G.add_edge(cluster_1["id"], cluster_2["id"])
-
-    component = sorted(map(sorted, nx.k_edge_components(G, k=1)))
-    max_id = -1
-    for cluster_1 in cluster_predictions:
-        if cluster_1["id"] > max_id:
-            max_id = cluster_1["id"]
-
-    for nodes in component:
-        if len(nodes) > 1:
-            max_id += 1
-            lines = []
-            for node in nodes:
-                for cluster in cluster_predictions:
-                    if cluster["id"] == node:
-                        lines.append(cluster)
-                        cluster_predictions.remove(cluster)
-            new_merged_cluster = build_cluster_from_lines(
-                lines, DocItemLabel.TEXT, max_id
-            )
-            cluster_predictions.append(new_merged_cluster)
-    return cluster_predictions
-
-
-def clean_up_clusters(
-    cluster_predictions,
-    raw_cells,
-    merge_cells=False,
-    img_table=False,
-    one_cell_table=False,
-):
-    DuplicateDeletedClusterIDs = []
-
-    for cluster_1 in cluster_predictions:
-        for cluster_2 in cluster_predictions:
-            if cluster_1["id"] != cluster_2["id"]:
-                # remove any artifcats created by merging clusters
-                if merge_cells == True:
-                    if contains(
-                        cluster_1["bbox"],
-                        [
-                            cluster_2["bbox"][0] + 3,
-                            cluster_2["bbox"][1] + 3,
-                            cluster_2["bbox"][2] - 3,
-                            cluster_2["bbox"][3] - 3,
-                        ],
-                    ):
-                        cluster_1["cell_ids"] = (
-                            cluster_1["cell_ids"] + cluster_2["cell_ids"]
-                        )
-                        DuplicateDeletedClusterIDs.append(cluster_2["id"])
-                # remove clusters that might appear inside tables, or images (such as pdf cells in graphs)
-                elif img_table == True:
-                    if (
-                        cluster_1["type"] == DocItemLabel.TEXT
-                        and cluster_2["type"] == DocItemLabel.PICTURE
-                        or cluster_2["type"] == DocItemLabel.TABLE
-                    ):
-                        if bb_iou(cluster_1["bbox"], cluster_2["bbox"]) > 0.5:
-                            DuplicateDeletedClusterIDs.append(cluster_1["id"])
-                        elif contains(
-                            [
-                                cluster_2["bbox"][0] - 3,
-                                cluster_2["bbox"][1] - 3,
-                                cluster_2["bbox"][2] + 3,
-                                cluster_2["bbox"][3] + 3,
-                            ],
-                            cluster_1["bbox"],
-                        ):
-                            DuplicateDeletedClusterIDs.append(cluster_1["id"])
-            # remove tables that have one pdf cell
-            if one_cell_table == True:
-                if (
-                    cluster_1["type"] == DocItemLabel.TABLE
-                    and len(cluster_1["cell_ids"]) < 2
-                ):
-                    DuplicateDeletedClusterIDs.append(cluster_1["id"])
-
-    DuplicateDeletedClusterIDs = list(set(DuplicateDeletedClusterIDs))
-
-    for cl_id in DuplicateDeletedClusterIDs:
-        for cluster in cluster_predictions:
-            if cl_id == cluster["id"]:
-                cluster_predictions.remove(cluster)
-    return cluster_predictions
-
-
-def assigning_cell_ids_to_clusters(clusters, raw_cells, threshold):
-    for cluster in clusters:
-        cells_in_cluster, _ = compute_enclosed_cells(
-            cluster["bbox"], raw_cells, min_cell_intersection_with_cluster=threshold
-        )
-        cluster["cell_ids"] = cells_in_cluster
-        ## These cell_ids are ids of the raw cells.
-        ## They are often, but not always, the same as the "id" or the index of the "cells" list in a prediction.
-    return clusters
-
-
-# Creates a map of cell_id->cluster_id
-def cell_id_state_map(clusters, cell_count):
-    clusters_around_cells = find_clusters_around_cells(cell_count, clusters)
-    orphan_cell_indices = [
-        ix for ix in range(cell_count) if len(clusters_around_cells[ix]) == 0
-    ]  # which cells are assigned no cluster?
-    ambiguous_cell_indices = [
-        ix for ix in range(cell_count) if len(clusters_around_cells[ix]) > 1
-    ]  # which cells are assigned > 1 clusters?
-    return clusters_around_cells, orphan_cell_indices, ambiguous_cell_indices
--- a/docs/examples/custom_convert.py
+++ b/docs/examples/custom_convert.py
@ -74,6 +74,10 @@ def main():
    pipeline_options.do_ocr = True
    pipeline_options.do_table_structure = True
    pipeline_options.table_structure_options.do_cell_matching = True
+    pipeline_options.ocr_options.lang = "es"
+    pipeline_options.accelerator_options = AcceleratorOptions(
+        num_threads=4, device=Device.AUTO
+    )

    doc_converter = DocumentConverter(
        format_options={
--- a/poetry.lock
+++ b/poetry.lock
@ -1,5 +1,36 @@
 # This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.

+[[package]]
+name = "accelerate"
+version = "1.1.1"
+description = "Accelerate"
+optional = false
+python-versions = ">=3.9.0"
+files = [
+    {file = "accelerate-1.1.1-py3-none-any.whl", hash = "sha256:61edd81762131b8d4bede008643fa1e1f3bf59bec710ebda9771443e24feae02"},
+    {file = "accelerate-1.1.1.tar.gz", hash = "sha256:0d39dfac557052bc735eb2703a0e87742879e1e40b88af8a2f9a93233d4cd7db"},
+]
+
+[package.dependencies]
+huggingface-hub = ">=0.21.0"
+numpy = ">=1.17,<3.0.0"
+packaging = ">=20.0"
+psutil = "*"
+pyyaml = "*"
+safetensors = ">=0.4.3"
+torch = ">=1.10.0"
+
+[package.extras]
+deepspeed = ["deepspeed"]
+dev = ["bitsandbytes", "black (>=23.1,<24.0)", "datasets", "diffusers", "evaluate", "hf-doc-builder (>=0.3.0)", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-subtests", "pytest-xdist", "rich", "ruff (>=0.6.4,<0.7.0)", "scikit-learn", "scipy", "timm", "torchdata (>=0.8.0)", "torchpippy (>=0.2.0)", "tqdm", "transformers"]
+quality = ["black (>=23.1,<24.0)", "hf-doc-builder (>=0.3.0)", "ruff (>=0.6.4,<0.7.0)"]
+rich = ["rich"]
+sagemaker = ["sagemaker"]
+test-dev = ["bitsandbytes", "datasets", "diffusers", "evaluate", "scikit-learn", "scipy", "timm", "torchdata (>=0.8.0)", "torchpippy (>=0.2.0)", "tqdm", "transformers"]
+test-prod = ["parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-subtests", "pytest-xdist"]
+test-trackers = ["comet-ml", "dvclive", "tensorboard", "wandb"]
+testing = ["bitsandbytes", "datasets", "diffusers", "evaluate", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-subtests", "pytest-xdist", "scikit-learn", "scipy", "timm", "torchdata (>=0.8.0)", "torchpippy (>=0.2.0)", "tqdm", "transformers"]
+
 [[package]]
 name = "aiohappyeyeballs"
 version = "2.4.4"
@ -231,6 +262,21 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi
 tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
 tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]

+[[package]]
+name = "autoflake"
+version = "2.3.1"
+description = "Removes unused imports and unused variables"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "autoflake-2.3.1-py3-none-any.whl", hash = "sha256:3ae7495db9084b7b32818b4140e6dc4fc280b712fb414f5b8fe57b0a8e85a840"},
+    {file = "autoflake-2.3.1.tar.gz", hash = "sha256:c98b75dc5b0a86459c4f01a1d32ac7eb4338ec4317a4469515ff1e687ecd909e"},
+]
+
+[package.dependencies]
+pyflakes = ">=3.0.0"
+tomli = {version = ">=2.0.1", markers = "python_version < \"3.11\""}
+
 [[package]]
 name = "autopep8"
 version = "2.2.0"
@ -793,64 +839,32 @@ name = "deepsearch-glm"
 version = "0.26.2"
 description = "Graph Language Models"
 optional = false
-python-versions = "<4.0,>=3.9"
-files = [
-    {file = "deepsearch_glm-0.26.2-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:00453a02bc8df959da576bc598ba528b394a9c016d6a428efc948c867be98938"},
-    {file = "deepsearch_glm-0.26.2-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:9e6f654ab4d9dc3e6e2033c9c45294c36e5e62650cac0e4a650af576364eb370"},
-    {file = "deepsearch_glm-0.26.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:1fdf2fce9d642bbc5222600a1b280a7413aa640ed01acee13d43401ec27d6ad5"},
-    {file = "deepsearch_glm-0.26.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:218cab085a58b88c55dbeb80cc5f5f7b3c5a96c8537eb2ada8e5cab70cd8e439"},
-    {file = "deepsearch_glm-0.26.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75be007e62d11780f2433b213dad14d14a270c3607e909fd1fc95efdf02446c6"},
-    {file = "deepsearch_glm-0.26.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a9b34c6cfb8b873ccf6e0072f5434c0c65a1d90652a6b901becc5b3b1695106"},
-    {file = "deepsearch_glm-0.26.2-cp310-cp310-win_amd64.whl", hash = "sha256:f4b63c6e1d4a7be597efbe96052286bca805784cd7283a037919c349971051c5"},
-    {file = "deepsearch_glm-0.26.2-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:eaabedca45fdd87dc455dc08b1785db15ba5ea6b706820330447f2cf7f03a67a"},
-    {file = "deepsearch_glm-0.26.2-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:101bc2a79027df555050d08112717249916c4d82ad5815be2a1ac0581d9ab2b5"},
-    {file = "deepsearch_glm-0.26.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:000d4a4895c4ff89c465b746bb7db3bb054a1fb5c3fabe2772d5431700c15d33"},
-    {file = "deepsearch_glm-0.26.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:2d97f9ebdff1a9086cc32ddd0abb14b42c4b4b2ae666986078fd77db3aa4487d"},
-    {file = "deepsearch_glm-0.26.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:666a3b53b0949735cff77a8209f2833866e34b635ca0c7f444807963d8379d93"},
-    {file = "deepsearch_glm-0.26.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89aae1ec83222ef39e045f0186023473e5ce2ed30846c13f2943192d34d57c0f"},
-    {file = "deepsearch_glm-0.26.2-cp311-cp311-win_amd64.whl", hash = "sha256:9bb173dcd0caef1d8a0d440e1ac3e9959c6b849e06b95b1d9b436661504c98f7"},
-    {file = "deepsearch_glm-0.26.2-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:bb286be157a7b163b46a4d1f7e48a30d5cc365d4926c18e8b3c72994a8f296f7"},
-    {file = "deepsearch_glm-0.26.2-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:defca9ecf1451ce3422b7783ea188571ffad7c941dbf52acc2638c5a4ffa7743"},
-    {file = "deepsearch_glm-0.26.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:226f8862c616a4def202a6d0f71eb5d8e9f6ddbded2cf431c146150303888cf8"},
-    {file = "deepsearch_glm-0.26.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:6ff0fe662254835763ad7d3edc2db320de8d233f645064e0356187d8e1fabe3b"},
-    {file = "deepsearch_glm-0.26.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91c1b84ec5b1308de37c660f49570ee1e72bd7f0f607566344446b9293f1183c"},
-    {file = "deepsearch_glm-0.26.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d634eeaae8943e1912c0dfbf3193e09bea8c1aac38db8a6fa1f03fe6a49cb84"},
-    {file = "deepsearch_glm-0.26.2-cp312-cp312-win_amd64.whl", hash = "sha256:9294087d26037574817e8e1710e387fd9ef9ba4328705de86dd40d819f32909a"},
-    {file = "deepsearch_glm-0.26.2-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:df7181143c62a1f0e166bc9ffb25deab617b53ba7c468284e3072b861c17405a"},
-    {file = "deepsearch_glm-0.26.2-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:2c3fef2c8394d6dc22d1bcdab12d0f46df9b411c5431dfb585a2c7bb128e1744"},
-    {file = "deepsearch_glm-0.26.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f641a88421aa806ccef8f8e657fbb65135f59732110d21b5103c09138a659315"},
-    {file = "deepsearch_glm-0.26.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:cf78499892caffb4bdc020b8c50ab7d623f568478375dcc2e3ec107d40972adc"},
-    {file = "deepsearch_glm-0.26.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72f2b432b81b0bc7c87e33c41a97c7a8da2536dd2b337eb1b7d054fba12d556"},
-    {file = "deepsearch_glm-0.26.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4703cae0d329b77e1d97892910313035204daa026d6e67ce6eb1b3e74e41f93e"},
-    {file = "deepsearch_glm-0.26.2-cp313-cp313-win_amd64.whl", hash = "sha256:c906c75d080414490727de416fd1782bc6a10301378f72a741aa227b183832cf"},
-    {file = "deepsearch_glm-0.26.2-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:10a366512540eff9f76645eb521df3469a160e8460ff6c3c1bfe172342c6c670"},
-    {file = "deepsearch_glm-0.26.2-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:304988f1e08bd86a8a7b7cc0495e38faf586231f33f05c1023597c6177758572"},
-    {file = "deepsearch_glm-0.26.2-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:c8f69b877846031648811ff80070b90b834bf9e4cdd74e5c2d93c7e18f408cd1"},
-    {file = "deepsearch_glm-0.26.2-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:1ba12361d1e4b8b02a72f515028f22686d98526a703a1091f89e9487fa3aa3c7"},
-    {file = "deepsearch_glm-0.26.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c03bb8b3cdb2952c9c269849830f7830fa7e0384b76809e25f4c2d5d091f746c"},
-    {file = "deepsearch_glm-0.26.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fe719b26d7cfcf5632a56be1f1420920fcdbea4418c014dd6e7e218dd2aca11"},
-    {file = "deepsearch_glm-0.26.2-cp39-cp39-win_amd64.whl", hash = "sha256:2b31fa419287af3429efc2d5610cbf2428bafc762e45b610a48ad30dffedaa9e"},
-    {file = "deepsearch_glm-0.26.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6df2504998e60c1aac3655820ad25e5eccca137da2e9f78fb53dc0fd0d1cdbf4"},
-    {file = "deepsearch_glm-0.26.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e1b4a789ec9555ec9f4ff6730d68081be37eaa43cb51c9463962967c9f672684"},
-    {file = "deepsearch_glm-0.26.2.tar.gz", hash = "sha256:7a607e78903b66d28beac3408156c11ab7b34ee70e8ccd0d292b28433e5a9c1d"},
-]
+python-versions = "^3.9"
+files = []
+develop = false

 [package.dependencies]
-docling-core = ">=2.0,<3.0"
+docling-core = "^2.0"
 docutils = "!=0.21"
 numpy = ">=1.24.4,<3.0.0"
 pandas = ">=1.5.1,<3.0.0"
-python-dotenv = ">=1.0.0,<2.0.0"
-pywin32 = {version = ">=307,<308", markers = "sys_platform == \"win32\""}
-requests = ">=2.32.3,<3.0.0"
-rich = ">=13.7.0,<14.0.0"
+python-dotenv = "^1.0.0"
+pywin32 = {version = "^307", markers = "sys_platform == \"win32\""}
+requests = "^2.32.3"
+rich = "^13.7.0"
 tabulate = ">=0.8.9"
-tqdm = ">=4.64.0,<5.0.0"
+tqdm = "^4.64.0"

 [package.extras]
 pyplot = ["matplotlib (>=3.7.1,<4.0.0)"]
 toolkit = ["deepsearch-toolkit (>=1.1.0,<2.0.0)"]

+[package.source]
+type = "git"
+url = "ssh://git@github.com/DS4SD/deepsearch-glm.git"
+reference = "cau/layout-processing-children-payloads"
+resolved_reference = "8fac776c07fb7541d17ebc9db48c9900074f25b1"
+
 [[package]]
 name = "defusedxml"
 version = "0.7.1"
@ -893,94 +907,74 @@ name = "docling-core"
 version = "2.6.1"
 description = "A python library to define and validate data types in Docling."
 optional = false
-python-versions = "<4.0,>=3.9"
-files = [
-    {file = "docling_core-2.6.1-py3-none-any.whl", hash = "sha256:8e7a5bc0ce13289567738481949fed3ab580f2d8cea7525b246159233d81b26b"},
-    {file = "docling_core-2.6.1.tar.gz", hash = "sha256:c8af45e0873611120cc24757d567d37e053a54e2ce060b7b5b44efd0d73f75e5"},
-]
+python-versions = "^3.9"
+files = []
+develop = false

 [package.dependencies]
-jsonref = ">=1.1.0,<2.0.0"
-jsonschema = ">=4.16.0,<5.0.0"
-pandas = ">=2.1.4,<3.0.0"
-pillow = ">=10.3.0,<11.0.0"
+jsonref = "^1.1.0"
+jsonschema = "^4.16.0"
+pandas = "^2.1.4"
+pillow = "^10.3.0"
 pydantic = ">=2.6.0,<2.10"
 pyyaml = ">=5.1,<7.0.0"
-tabulate = ">=0.9.0,<0.10.0"
-typing-extensions = ">=4.12.2,<5.0.0"
+tabulate = "^0.9.0"
+typing-extensions = "^4.12.2"
+
+[package.source]
+type = "git"
+url = "ssh://git@github.com/DS4SD/docling-core.git"
+reference = "feat-add-legacy-convert"
+resolved_reference = "4434b1073dc15fefb75f28c37299abd32d9c532f"

 [[package]]
 name = "docling-ibm-models"
 version = "2.0.7"
 description = "This package contains the AI models used by the Docling PDF conversion package"
 optional = false
-python-versions = "<4.0,>=3.9"
-files = [
-    {file = "docling_ibm_models-2.0.7-py3-none-any.whl", hash = "sha256:bf362add22e9c526ac56c04bce412d7bb1c331b44a73204abba0b1d90a500c78"},
-    {file = "docling_ibm_models-2.0.7.tar.gz", hash = "sha256:e1372c4f2517d522125fb02a820558f01914926f532bcd0534f1028a25d63667"},
-]
+python-versions = "^3.9"
+files = []
+develop = false

 [package.dependencies]
+accelerate = "^1.1.1"
 huggingface_hub = ">=0.23,<1"
-jsonlines = ">=3.1.0,<4.0.0"
+jsonlines = "^3.1.0"
 numpy = ">=1.24.4,<3.0.0"
-opencv-python-headless = ">=4.6.0.66,<5.0.0.0"
-Pillow = ">=10.0.0,<11.0.0"
-torch = ">=2.2.2,<3.0.0"
-torchvision = ">=0,<1"
-tqdm = ">=4.64.0,<5.0.0"
+opencv-python-headless = "^4.6.0.66"
+Pillow = "^10.0.0"
+torch = "^2.2.2"
+torchvision = "^0"
+tqdm = "^4.64.0"
+transformers = "^4.46.2"
+
+[package.source]
+type = "git"
+url = "ssh://git@github.com/DS4SD/docling-ibm-models.git"
+reference = "nli/performance"
+resolved_reference = "c1bed7d5451ee16b7fb5b0bc5e847f599ed93aa7"

 [[package]]
 name = "docling-parse"
 version = "2.1.2"
 description = "Simple package to extract text with coordinates from programmatic PDFs"
 optional = false
-python-versions = "<4.0,>=3.9"
-files = [
-    {file = "docling_parse-2.1.2-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:140319e3eac73f9768d35313739891ae637af57fda03eade17d90e2d28ad80eb"},
-    {file = "docling_parse-2.1.2-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:cec968a436ad14e8a45a72fc0e0074750eee28548a14f3c3df5157a68ac958e7"},
-    {file = "docling_parse-2.1.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:c84eba992fee49d190cf4834fd44ef4e6549c3f1fcd41b91622114703a7e4a87"},
-    {file = "docling_parse-2.1.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:ae02af07f3dd335f56383a83efdc1f6450b7d38e21e1131005dbd341eb38e47d"},
-    {file = "docling_parse-2.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fa0731e97d2644ff8a3257ae53208b88be3ddc6a4bc54fbe39e21f8395530f0"},
-    {file = "docling_parse-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d26d60136aab5f4a3a773922a8dcc530334165331660d074cd88dcd5d91206cd"},
-    {file = "docling_parse-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:76eef41d50017c2fc531face44c1a35bef66095951622617d0f281e35d18e9e0"},
-    {file = "docling_parse-2.1.2-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:7f1ad037d3ac0d80252c493e73b12688ded3ece9bae7954ba62765506c139d21"},
-    {file = "docling_parse-2.1.2-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:9f1360c0558c84f4b6633b0882256f6d621fd9e52179acae39c727a43b48d937"},
-    {file = "docling_parse-2.1.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:5d505c2d3e9eff4f3064b4d1f017a3c6577b5d8ba55540d558f4899561862956"},
-    {file = "docling_parse-2.1.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:58f552f61ac35c02890b03fe59b06552353314c3c1ee2a050c68a8a206ab1b4b"},
-    {file = "docling_parse-2.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22069dadcfdcebc02e36e27f80d452f1265a5a97d894f2391490bf099bc5432c"},
-    {file = "docling_parse-2.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f68942b31684a021e27b9b07d27ed139911444b33963f7e0b5d2dbda8aaa5cb1"},
-    {file = "docling_parse-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:d87e3fbf1549cd8bc171240c18584ba8c32f83963b5af66b2a70a2bc3af56d2e"},
-    {file = "docling_parse-2.1.2-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:5b00b81fa8eb0b34621f1ef9d07623d7dbcc354a33295a5b0c4209c39b1ff8eb"},
-    {file = "docling_parse-2.1.2-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:1b99b122f941d0f19e92a215e589b94f49db899c5eec0147e83824652b18ce74"},
-    {file = "docling_parse-2.1.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:744fe368a8fa49778e881c1052427c38a7d0e367273fcdef493e047513783108"},
-    {file = "docling_parse-2.1.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:b8a3e558a96f7d593269be75ba4147ebe221f5edad3d41244cef3533e8a51b74"},
-    {file = "docling_parse-2.1.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afcf53bce8c91886c1360e625e51d15ebfb36d37cd53b6e019e86ce1118c1d0c"},
-    {file = "docling_parse-2.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89d25fc4fb8f16a8ed5bc8c4f00a77739d2536732c0ddae16340b1859adf68fd"},
-    {file = "docling_parse-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:28a7f49a865a0cd71033a7899aac00c7d2e3b6c3a76488f8676ba0fc353d9f3a"},
-    {file = "docling_parse-2.1.2-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:ad1560532cdf15dcb4a6005c8b7fe19def0e910e6125863f14978d6d07a1ba47"},
-    {file = "docling_parse-2.1.2-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:19003b1bb64cd5a40999a3c5ffcb9a9d9608a073949b76acc58d58fb5054ea03"},
-    {file = "docling_parse-2.1.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:041bf1c72a23d62e2dd30dcc3508222f6674e85b0f1d19a3196fd6d7b5f56015"},
-    {file = "docling_parse-2.1.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:12403c26e833d8fdf0f406d2895f5108fd07b64a4d929c9105ca60f09b882c34"},
-    {file = "docling_parse-2.1.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1082e227af3e31085eff3e96103b09becdf95324304e17ce0b1b61c43b93fbb7"},
-    {file = "docling_parse-2.1.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77b36e36d1e07a06a1616ee281079d6b972c3059f2fa02dafcfc225a41e5bd1a"},
-    {file = "docling_parse-2.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:4300df86657935b0109c44702857ebf3d0713f1bbe376982f369504a762e2fef"},
-    {file = "docling_parse-2.1.2-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:912fe44507f209d997e1183f38a71d4e14c31d53a164fb862631822624dad892"},
-    {file = "docling_parse-2.1.2-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:10ff1928b12099f446fcd0b043182173e6b02ce74008ea6ce921d56cdee8964e"},
-    {file = "docling_parse-2.1.2-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:391ad31a4086fabbc290851432f4cf0bdc366e07a454adf49e42029898d6b477"},
-    {file = "docling_parse-2.1.2-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:ebf478e99c0c16d7dad30c0fdb1f5e236ae94d48da8dec48dbe5f0841eead4ed"},
-    {file = "docling_parse-2.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b1c904017330d096981b7db6b225b66aff1cebdc422843103a782121d6e8be8"},
-    {file = "docling_parse-2.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8bc8ec6ad1bec6168991b895d749b222bef14b568d1d9f6c06efaeb1645dfe12"},
-    {file = "docling_parse-2.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:e6eb130aa367247e1f32225bb1608cee901d711b475527404bbc4330c9199b99"},
-    {file = "docling_parse-2.1.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ef88d565c761b48f8a175fd474e068c0da9d4401e22d3e38de73e2f00f3df2d1"},
-    {file = "docling_parse-2.1.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:bdc8ccbdc4ab91b829b8c421ad89da276442a2c891eda1f6507f248d0bd8dff9"},
-    {file = "docling_parse-2.1.2.tar.gz", hash = "sha256:3c249f50e6351eb6126331a179fe86b64dc2073e9f881d52f8c8fb391633b89e"},
-]
+python-versions = "^3.9"
+files = []
+develop = false

 [package.dependencies]
+autoflake = "^2.3.1"
+pillow = "^10.4.0"
 pywin32 = {version = ">=305", markers = "sys_platform == \"win32\""}
 tabulate = ">=0.9.0,<1.0.0"

+[package.source]
+type = "git"
+url = "ssh://git@github.com/DS4SD/docling-parse.git"
+reference = "dev/expose-cell-sanitisation-via-python"
+resolved_reference = "8ea65ae3080db88f54f8a3f7b622e7b002c9b7f0"
+
 [[package]]
 name = "docutils"
 version = "0.21.2"
@ -3192,6 +3186,7 @@ files = [
    {file = "nh3-0.2.19-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:00810cd5275f5c3f44b9eb0e521d1a841ee2f8023622de39ffc7d88bd533d8e0"},
    {file = "nh3-0.2.19-cp38-abi3-win32.whl", hash = "sha256:7e98621856b0a911c21faa5eef8f8ea3e691526c2433f9afc2be713cb6fbdb48"},
    {file = "nh3-0.2.19-cp38-abi3-win_amd64.whl", hash = "sha256:75c7cafb840f24430b009f7368945cb5ca88b2b54bb384ebfba495f16bc9c121"},
+    {file = "nh3-0.2.19.tar.gz", hash = "sha256:790056b54c068ff8dceb443eaefb696b84beff58cca6c07afd754d17692a4804"},
 ]

 [[package]]
@ -3766,9 +3761,9 @@ numpy = [
    {version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""},
    {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
+    {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
    {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
    {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
-    {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
 ]

 [[package]]
@ -3792,9 +3787,9 @@ numpy = [
    {version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""},
    {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
+    {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
    {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
    {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
-    {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
 ]

 [[package]]
@ -5474,12 +5469,12 @@ cffi = {version = "*", markers = "implementation_name == \"pypy\""}

 [[package]]
 name = "rapidocr-onnxruntime"
-version = "1.4.0"
+version = "1.4.1"
 description = "A cross platform OCR Library based on OnnxRuntime."
 optional = true
 python-versions = "<3.13,>=3.6"
 files = [
-    {file = "rapidocr_onnxruntime-1.4.0-py3-none-any.whl", hash = "sha256:d21c4ba2ef80b7a8ecf8178632f273398a92ab44a1ffb9e171139ef2a589d690"},
+    {file = "rapidocr_onnxruntime-1.4.1-py3-none-any.whl", hash = "sha256:5ecdb8f4f3beec56630197f87c3e67ab744fce0cc66394b7b1da08c8c96a727f"},
 ]

 [package.dependencies]
@ -5700,112 +5695,114 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"]

 [[package]]
 name = "rpds-py"
-version = "0.22.0"
+version = "0.22.1"
 description = "Python bindings to Rust's persistent data structures (rpds)"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "rpds_py-0.22.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a4366f264fa60d3c109f0b27af0cd9eb8d46746bd70bd3d9d425f035b6c7e286"},
-    {file = "rpds_py-0.22.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e34a3e665d38d0749072e6565400c8ce9abae976e338919a0dfbfb0e1ba43068"},
-    {file = "rpds_py-0.22.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38cacf1f378571450576f2c8ce87da6f3fddc59d744de5c12b37acc23285b1e1"},
-    {file = "rpds_py-0.22.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8cbb040fec8eddd5a6a75e737fd73c9ce37e51f94bacdd0b178d0174a4758395"},
-    {file = "rpds_py-0.22.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d80fd710b3307a3c63809048b72c536689b9b0b31a2518339c3f1a4d29c73d7a"},
-    {file = "rpds_py-0.22.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b5d17d8f5b885ce50e0cda85f99c0719e365e98b587338535fa566a48375afb"},
-    {file = "rpds_py-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f7a048ec1ebc991331d709be4884dc318c9eaafa66dcde8be0933ac0e702149"},
-    {file = "rpds_py-0.22.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:306da3dfa174b489a3fc63b0872e2226a5ddf94c59875a770d72aff945d5ed96"},
-    {file = "rpds_py-0.22.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c7b4450093c0c909299770226fb0285be47b0a57545bae25b5c4e51566b0e587"},
-    {file = "rpds_py-0.22.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0903ffdb5b9007e503203b6285e4ff0faf96d875c19f1d103b475acf7d9f7311"},
-    {file = "rpds_py-0.22.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d1522025cda9e57329aade769f56e5793b2a5da7759a21914ee10e67e17e601e"},
-    {file = "rpds_py-0.22.0-cp310-cp310-win32.whl", hash = "sha256:49e084d47a66027ac72844f9f52f13d347a9a1f05d4f84381b420e47f836a7fd"},
-    {file = "rpds_py-0.22.0-cp310-cp310-win_amd64.whl", hash = "sha256:d9ceca96df54cb1675a0b7f52f1c6d5d1df62c5b40741ba211780f1b05a282a2"},
-    {file = "rpds_py-0.22.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:771c9a3851beaa617d8c8115d65f834a2b52490f42ee2b88b13f1fc5529e9e0c"},
-    {file = "rpds_py-0.22.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:341a07a4b55126bfae68c9bf24220a73d456111e5eb3dcbdab9fd16de2341224"},
-    {file = "rpds_py-0.22.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7649c8b8e4bd1ccc5fcbd51a855d57a617deeba19c66e3d04b1abecc61036b2"},
-    {file = "rpds_py-0.22.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2f513758e7cda8bc262e80299a8e3395d7ef7f4ae705be62632f229bc6c33208"},
-    {file = "rpds_py-0.22.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba1fc34d0b2f6fd53377a4c954116251eba6d076bf64f903311f4a7d27d10acd"},
-    {file = "rpds_py-0.22.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:632d2fdddd9fbe3ac8896a119fd18a71fc95ca9c4cbe5223096c142d8c4a2b1d"},
-    {file = "rpds_py-0.22.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:326e42f2b49462e05f8527a1311ce98f9f97c484b3e443ec0ea4638bed3aebcf"},
-    {file = "rpds_py-0.22.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e9bbdba9e75b1a9ee1dd1335034dad998ef1acc08492226c6fd50aa773bdfa7d"},
-    {file = "rpds_py-0.22.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:41f65a97bf2c4b161c9f8f89bc37058346bec9b36e373c8ad00a16c957bff625"},
-    {file = "rpds_py-0.22.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0686f2c16eafdc2c6b4ce6e86e5b3092e87db09ae64be2787616444eb35b9756"},
-    {file = "rpds_py-0.22.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4e7c9aa2353eb0b0d845323857197daa036c2ff8624df990b0d886d22a8f665e"},
-    {file = "rpds_py-0.22.0-cp311-cp311-win32.whl", hash = "sha256:2d2fc3ab021be3e0b5aec6d4164f2689d231b8bfc5185cc454314746aa4aee72"},
-    {file = "rpds_py-0.22.0-cp311-cp311-win_amd64.whl", hash = "sha256:87453d491369cd8018016d2714a13e8461975161703c18ee31eecf087a8ae5d4"},
-    {file = "rpds_py-0.22.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e9d4293b21c69ee4f9e1a99ac4f772951d345611c614a0cfae2ec6b565279bc9"},
-    {file = "rpds_py-0.22.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:67e013a17a3db4d98cc228fd5aeb36a51b0f5cf7330b9102a552060f1fe4e560"},
-    {file = "rpds_py-0.22.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b639a19e1791b646d27f15d17530a51722cc728d43b2dff3aeb904f92d91bac"},
-    {file = "rpds_py-0.22.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1357c3092702078b7782b6ebd5ba9b22c1a291c34fbf9d8f1a48237466ac7758"},
-    {file = "rpds_py-0.22.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:842855bbb113a19c393c6de5aa6ed9a26c6b13c2fead5e49114d39f0d08b94d8"},
-    {file = "rpds_py-0.22.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ae7927cd2b869ca4dc645169d8af5494a29c99afd0ea0f24dd00c811ab1d8b8"},
-    {file = "rpds_py-0.22.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91bfef5daa2a5a4fe62f8d317fc91a626073639f951f851bd2cb252d01bc6c5"},
-    {file = "rpds_py-0.22.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4fc4824e38c1e91a73bc820e7caacaf19d0acd557465aceef0420ca59489b390"},
-    {file = "rpds_py-0.22.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:92d28a608127b357da47c99e0d0e0655ca2060286540fe9f2a25a2e8ac666e05"},
-    {file = "rpds_py-0.22.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c637188b930175c256f13adbfc427b83ec7e64476d1ec9d6608f312bb84e06c3"},
-    {file = "rpds_py-0.22.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:93bbd66f46dddc41e8c656130c97c0fb515e0fa44e1eebb2592769dbbd41b2f5"},
-    {file = "rpds_py-0.22.0-cp312-cp312-win32.whl", hash = "sha256:54d8f94dec5765a9edc19610fecf0fdf9cab36cbb9def1213188215f735a6f98"},
-    {file = "rpds_py-0.22.0-cp312-cp312-win_amd64.whl", hash = "sha256:931bf3d0705b2834fed29354f35170fa022fe22a95542b61b7c66aca5f8a224f"},
-    {file = "rpds_py-0.22.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:2a57300cc8b034c5707085249efd09f19116bb80278d0ec925d7f3710165c510"},
-    {file = "rpds_py-0.22.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c398a5a8e258dfdc5ea2aa4e5aa2ca3207f654a8eb268693dd1a76939074a588"},
-    {file = "rpds_py-0.22.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a6cc4eb1e86364331928acafb2bb41d8ab735ca3caf2d6019b9f6dac3f4f65d"},
-    {file = "rpds_py-0.22.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:574c5c94213bc9990805bfd7e4ba3826d3c098516cbc19f0d0ef0433ad93fa06"},
-    {file = "rpds_py-0.22.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4c0321bc03a1c513eca1837e3bba948b975bcf3a172aebc197ab3573207f137a"},
-    {file = "rpds_py-0.22.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d276280649305c1da6cdd84585d48ae1f0efa67434d8b10d2df95228e59a05bb"},
-    {file = "rpds_py-0.22.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c17b43fe9c6da16885e3fe28922bcd1a029e61631fb771c7d501019b40bcc904"},
-    {file = "rpds_py-0.22.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:48c95997af9314f4034fe5ba2d837399e786586e220835a578d28fe8161e6ae5"},
-    {file = "rpds_py-0.22.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e9aa4af6b879bb75a3c7766fbf49d77f4097dd12b548ecbbd8b3f85caa833281"},
-    {file = "rpds_py-0.22.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8426f97117b914b9bfb2a7bd46edc148e8defda728a55a5df3a564abe70cd7a4"},
-    {file = "rpds_py-0.22.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:034964ea0ea09645bdde13038b38abb14be0aa747f20fcfab6181207dd9e0483"},
-    {file = "rpds_py-0.22.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:3dc7c64b56b82428894f056e9ff6e8ee917ff74fc26b65211a33602c2372e928"},
-    {file = "rpds_py-0.22.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:1212cb231f2002934cd8d71a0d718fdd9d9a2dd671e0feef8501038df3508026"},
-    {file = "rpds_py-0.22.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f21e1278c9456cd601832375c778ca44614d3433996488221a56572c223f04a"},
-    {file = "rpds_py-0.22.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:875fe8dffb43c20f68379ee098b035a7038d7903c795d46715f66575a7050b19"},
-    {file = "rpds_py-0.22.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e23dcdd4b2ff9c6b3317ea7921b210d39592f8ca1cdea58ada25b202c65c0a69"},
-    {file = "rpds_py-0.22.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0fb8efc9e579acf1e556fd86277fecec320c21ca9b5d39db96433ad8c45bc4a"},
-    {file = "rpds_py-0.22.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe23687924b25a2dee52fab15976fd6577ed8518072bcda9ff2e2b88ab1f168b"},
-    {file = "rpds_py-0.22.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5469b347445d1c31105f33e7bfc9a8ba213d48e42641a610dda65bf9e3c83f5"},
-    {file = "rpds_py-0.22.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a810a57ce5e8ecf8eac6ec4dab534ff80c34e5a2c31db60e992009cd20f58e0f"},
-    {file = "rpds_py-0.22.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:d9bb9242b38a664f307b3b897f093896f7ed51ef4fe25a0502e5a368de9151ea"},
-    {file = "rpds_py-0.22.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:b4660943030406aaa40ec9f51960dd88049903d9536bc3c8ebb5cc4e1f119bbe"},
-    {file = "rpds_py-0.22.0-cp313-cp313t-win32.whl", hash = "sha256:208ce1d8e3af138d1d9b21d7206356b7f29b96675e0113aea652cf024e4ddfdc"},
-    {file = "rpds_py-0.22.0-cp313-cp313t-win_amd64.whl", hash = "sha256:e6da2e0500742e0f157f005924a0589f2e2dcbfdd6cd0cc0abce367433e989be"},
-    {file = "rpds_py-0.22.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:f980a0640599a74f27fd9d50c84c293f1cb7afc2046c5c6d3efaf8ec7cdbc326"},
-    {file = "rpds_py-0.22.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ca505fd3767a09a139737f3278bc8a485cb64043062da89bcba27e2f2ea78d33"},
-    {file = "rpds_py-0.22.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba235e00e0878ba1080b0f2a761f143b2a2d1c354f3d8e507fbf2f3de401bf18"},
-    {file = "rpds_py-0.22.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:81e7a27365b02fe70a77f1365376879917235b3fec551d19b4c91b51d0bc1d07"},
-    {file = "rpds_py-0.22.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:32a0e24cab2daae0503b06666d516e90a080c1a95aff0406b9f03c6489177c4b"},
-    {file = "rpds_py-0.22.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a73ed43d64209e853bba567a543170267a5cd64f359540b0ca2d597e329ba172"},
-    {file = "rpds_py-0.22.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0abcce5e874474d3eab5ad53be03dae2abe651d248bdeaabe83708e82969e78"},
-    {file = "rpds_py-0.22.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f4e9946c8c7def17e4fcb5eddb14c4eb6ebc7f6f309075e6c8d23b133c104607"},
-    {file = "rpds_py-0.22.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:758098b38c344d9a7f279baf0689261777e601f620078ef5afdc9bd3339965c3"},
-    {file = "rpds_py-0.22.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:9ad4640a409bc2b7d22b7921e7660f0db96c5c8c69fbb2e8f3261d4f71d33983"},
-    {file = "rpds_py-0.22.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8c48fc7458fe3a74dcdf56ba3534ff41bd421f69436df09ff3497fdaac18b431"},
-    {file = "rpds_py-0.22.0-cp39-cp39-win32.whl", hash = "sha256:fde778947304e55fc732bc8ea5c6063e74244ac1808471cb498983a210aaf62c"},
-    {file = "rpds_py-0.22.0-cp39-cp39-win_amd64.whl", hash = "sha256:5fdf91a7c07f40e47b193f2acae0ed9da35d09325d7c3c3279f722b7cbf3d264"},
-    {file = "rpds_py-0.22.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c8fd7a16f7a047e06c747cfcf2acef3ac316132df1c6077445b29ee6f3f3a70b"},
-    {file = "rpds_py-0.22.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:6b6e4bcfc32f831bfe3d6d8a5acedfbfd5e252a03c83fa24813b277a3a8a13ca"},
-    {file = "rpds_py-0.22.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eadd2417e83a77ce3ae4a0efd08cb0ebdfd317b6406d11020354a53ad458ec84"},
-    {file = "rpds_py-0.22.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f9dc2113e0cf0dd637751ca736186fca63664939ceb9f9f67e93ade88c69c0c9"},
-    {file = "rpds_py-0.22.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc2c00acdf68f1f69a476b770af311a7dc3955b7de228b04a40bcc51ac4d743b"},
-    {file = "rpds_py-0.22.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dfdabdf8519c93908b2bf0f87c3f86f9e88bab279fb4acfd0907519ca5a1739f"},
-    {file = "rpds_py-0.22.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8338db3c76833d02dc21c3e2c42534091341d26e4f7ba32c6032bb558a02e07b"},
-    {file = "rpds_py-0.22.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8ad4dfda52e64af3202ceb2143a62deba97894b71c64a4405ee80f6b3ea77285"},
-    {file = "rpds_py-0.22.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:3b94b074dcce39976db22ea75c7aea8b22d95e6d3b62f76e20e1179a278521d8"},
-    {file = "rpds_py-0.22.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:d4f2af3107fe4dc40c0d1a2409863f5249c6796398a1d83c1d99a0b3fa6cfb8d"},
-    {file = "rpds_py-0.22.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:bb11809b0de643a292a82f728c494a2bbef0e30a7c42d37464abbd6bef7ca7b1"},
-    {file = "rpds_py-0.22.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c1c21030ed494deb10226f90e2dbd84a012d59810c409832714a3dd576527be2"},
-    {file = "rpds_py-0.22.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:64a0c965a1e299c9b280006bdb15c276c427c45360aed676305dc36bcaa4d13c"},
-    {file = "rpds_py-0.22.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:2498ff422823be087b48bc82710deb87ac34f6b7c8034ee39920647647de1e60"},
-    {file = "rpds_py-0.22.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59e63da174ff287db05ef7c21d75974a5bac727ed60452aeb3a14278477842a8"},
-    {file = "rpds_py-0.22.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e1c04fb380bc8efaae2fdf17ed6cd5d223da78a8b0b18a610f53d4c5d6e31dfd"},
-    {file = "rpds_py-0.22.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e04919ffa9a728c446b27b6b625fa1d00ece221bdb9d633e978a7e0353a12c0e"},
-    {file = "rpds_py-0.22.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24c28df05bd284879d0fac850ba697077d2a33b7ebcaea6318d6b6cdfdc86ddc"},
-    {file = "rpds_py-0.22.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d33622dc63c295788eed09dbb1d11bed178909d3267b02d873116ee6be368244"},
-    {file = "rpds_py-0.22.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7539dbb8f705e13629ba6f23388976aad809e387f32a6e5c0712e4e8d9bfcce7"},
-    {file = "rpds_py-0.22.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:b8906f537978da3f7f0bd1ba37b69f6a877bb43312023b086582707d2835bf2f"},
-    {file = "rpds_py-0.22.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:62ab12fe03ffc49978d29de9c31bbb216610157f7e5ca8e172fed6642aead3be"},
-    {file = "rpds_py-0.22.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:762206ba3bf1d6c8c9e0055871d3c0d5b074b7c3120193e6c067e7866f106ab1"},
-    {file = "rpds_py-0.22.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ed0102146574e5e9f079b2e1a06e6b5b12a691f9c74a65b93b7f3d4feda566c6"},
-    {file = "rpds_py-0.22.0.tar.gz", hash = "sha256:32de71c393f126d8203e9815557c7ff4d72ed1ad3aa3f52f6c7938413176750a"},
+    {file = "rpds_py-0.22.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:ab27dd4edd84b13309f268ffcdfc07aef8339135ffab7b6d43f16884307a2a48"},
+    {file = "rpds_py-0.22.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9d5b925156a746dc1f5f52376fdd1fbdd3f6ffe1fcd6f5e06f77ca79abb940a3"},
+    {file = "rpds_py-0.22.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:201650b309c419143775c15209c620627de3c09a27c7fb58375325aec5cce260"},
+    {file = "rpds_py-0.22.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:31264187fc934ff1024a4f56775f33c9252d3f4f3e27ec07d1995a26b52702c3"},
+    {file = "rpds_py-0.22.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:97c5ffe47ccf92d8b17e10f8a5ce28d015aa1196edc3359684cf31504eae6a14"},
+    {file = "rpds_py-0.22.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9ac7280bd045f472b50306d7efeee051b69e3a2dd1b90f46bd7e86e63b1efa2"},
+    {file = "rpds_py-0.22.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f941fb86195f97be7f6efe04a21b223f05dfe4d1dfb159999e2f8d101e44cc4"},
+    {file = "rpds_py-0.22.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f91bfc39f7a64168e08ab831fa497ec5438c1d6c6e2f9e12848d95ad11ac8523"},
+    {file = "rpds_py-0.22.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:effcae2152afe7937a28376dbabb25c770ef99ed4e16a4ffeb8e6a4f7c4f06aa"},
+    {file = "rpds_py-0.22.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:2177e59c033bf0d1bf7de1ced561205963583caf3242c6c700a723034bfb5f8e"},
+    {file = "rpds_py-0.22.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:66f4f48a89cdd30ab3a47335df81c76e9a63799d0d84b29c0618371c66fa37b0"},
+    {file = "rpds_py-0.22.1-cp310-cp310-win32.whl", hash = "sha256:b07fa9e634234e84096adfa4be3828c8f26e238679c122824b2b3d7131bec578"},
+    {file = "rpds_py-0.22.1-cp310-cp310-win_amd64.whl", hash = "sha256:ca4657e9fd0b1b5376942d403d634ce188f79064f0873aa853ab05b10185ceec"},
+    {file = "rpds_py-0.22.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:608c84699b2db09c6a8743845b1a3dad36fae53eaaecb241d45b13dff74405fb"},
+    {file = "rpds_py-0.22.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9dae4eb9b5534e09ba6c6ab496a757e5e394b7e7b08767d25ca37e8d36491114"},
+    {file = "rpds_py-0.22.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09a1f000c5f6e08b298275bae00921e9fbbf2a35dae0a86db2821c058c2201a9"},
+    {file = "rpds_py-0.22.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:580ccbf11f02f948add4cb641843030a89f1463d7c0740cbfc9aca91e9dc34b3"},
+    {file = "rpds_py-0.22.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:96559e05bdf938b2048353e10a7920b98f853cefe4482c2064a718d7d0a50bd7"},
+    {file = "rpds_py-0.22.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:128cbaed7ba26116820bcb992405d6a13ea18c8fca1b8c4f59906d858e91e979"},
+    {file = "rpds_py-0.22.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:734783dd7da58f76222f458346ddebdb3621686a1a2a667db5049caf0c9956b9"},
+    {file = "rpds_py-0.22.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c9ce6b83597d45bec44a2690857ede62fc98223772135f8a7fa90884eb726501"},
+    {file = "rpds_py-0.22.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bca4428c4a957b78ded3e6e62884ab03f029dce8fa8d34818da0f80f61332b49"},
+    {file = "rpds_py-0.22.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1ded65691a1d3fd7d2aa89d2c91aa51f941601bb2ce099739909034d957fef4b"},
+    {file = "rpds_py-0.22.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:72407065ad459db9f3d052ea8c51e02534f02533fc61e51cbab3bd94166f086c"},
+    {file = "rpds_py-0.22.1-cp311-cp311-win32.whl", hash = "sha256:eb013aa01b404219f28dc973d9e6310fd4db216d7299253dd355629952e0564e"},
+    {file = "rpds_py-0.22.1-cp311-cp311-win_amd64.whl", hash = "sha256:8bd9ec1db79a664f4cbb12878693b73416f4d2cb425d3e27eccc1bdfbdc826ef"},
+    {file = "rpds_py-0.22.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:8ec41049c90d204a6561238a9ad6c7263ebb7009d9759c98b58078d9d2fec9ba"},
+    {file = "rpds_py-0.22.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:102be79c4cc47a4aeb5912401185c404cd2601c15a7163bbecff7f1bfe20b669"},
+    {file = "rpds_py-0.22.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a603155db408f773637f9e3a712c6e3cbc521aaa8fa2b99f9ba6106c59a2496"},
+    {file = "rpds_py-0.22.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5dbff9402c2bdf00bf0df9905694b3c292a3847c725651938a72f554351a5fcb"},
+    {file = "rpds_py-0.22.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:96b3759d8ab2323324e0a92b2f44834f9d88089b8d1ab6f533b61f4be3411cef"},
+    {file = "rpds_py-0.22.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3029f481b31f329b1fdb4ec4b56935d82210ddd9c6f86ea5a87c06f1e97b161"},
+    {file = "rpds_py-0.22.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d280b4bf09f719b89fd9aab3b71067acc0d0449b7d1eba99a2ade4939cef8296"},
+    {file = "rpds_py-0.22.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6c8e97e19aa7b0b0d801a159f932ce4435f1049c8c38e2bb372bb5bee559ce50"},
+    {file = "rpds_py-0.22.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:50e4b5d291105f7063259fe0125b1af902fb34499444d7c5c521dd8328b00939"},
+    {file = "rpds_py-0.22.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d3777c446bb1c5fcd82dc3f8776e1a146cd91e80cc1892f8634575ace438d22f"},
+    {file = "rpds_py-0.22.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:447ae1104fb32197b9262f772d565d38e834cc2e9edd89350b37b88fed636e70"},
+    {file = "rpds_py-0.22.1-cp312-cp312-win32.whl", hash = "sha256:55d371b9d8b0c2a68a50413a8cb01c3c3ce1ea4f768bf77b66669a9a486e101e"},
+    {file = "rpds_py-0.22.1-cp312-cp312-win_amd64.whl", hash = "sha256:413a30a99d8683dace3765885920ed27ab662efbb6c98d81db76c397ad1ffd71"},
+    {file = "rpds_py-0.22.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:aa2ba0176037c915d8660a4e46581d645e2c22b5373e466bc8640a794d45861a"},
+    {file = "rpds_py-0.22.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4ba6c66fbc6015b2f99e7176fec41793cecb00c4cc357cad038dff85e6ac42ab"},
+    {file = "rpds_py-0.22.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15fa4ca658f8ad22645d3531682b17e5580832efbfa87304c3e62214c79c1e8a"},
+    {file = "rpds_py-0.22.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d7833ef6f5d6cb634f296abfd93452fb3eb44c4e9a6ae95c1021eab704c1cee2"},
+    {file = "rpds_py-0.22.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c0467838c90435b80793cde486a318fc916ee57f2af54e4b10c72b20cbdcbaa9"},
+    {file = "rpds_py-0.22.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d962e2e89b3a95e3597a34b8c93ced1e98958502c5b8096c9fd69deff279f561"},
+    {file = "rpds_py-0.22.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ce729f1dc8a4a190c34b69f75377bddc004079b2963ab722ab91fafe040be6d"},
+    {file = "rpds_py-0.22.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8080467df22feca0fc9c46567001777c6fbc2b4a2683a7137420896051874ca1"},
+    {file = "rpds_py-0.22.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0f9eb37d3a60b262a98ab51ee899cac039de9ca0ce68dcf1a6518a09719020b0"},
+    {file = "rpds_py-0.22.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:153248f48d6f90a295a502f53ec544a3ffbd21b0bb32f5dca39c4b93a764d6a2"},
+    {file = "rpds_py-0.22.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0a53592cdf98cec3dfcdb24ffec8a4797e7656b65700099af43ec7df023b6de4"},
+    {file = "rpds_py-0.22.1-cp313-cp313-win32.whl", hash = "sha256:e8056adcefa2dcb67e8bc91ea5eee26df66e8b297a8cd6ff0903f85c70908fa0"},
+    {file = "rpds_py-0.22.1-cp313-cp313-win_amd64.whl", hash = "sha256:a451dba533be77454ebcffc85189108fc05f279100835ac76e7989edacb89156"},
+    {file = "rpds_py-0.22.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:2ea23f1525d4f64286dbe0947c929d45c3ffe963b2dbed1d3844a2e4938bda42"},
+    {file = "rpds_py-0.22.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3aaa22487477de9618ce3b37f99fbe81219ba96f3c2ca84f576f0ab451b83aba"},
+    {file = "rpds_py-0.22.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8954b9ffe60f479a0c0ba40987db2546c735ab02a725ea7fd89342152d4d821d"},
+    {file = "rpds_py-0.22.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c8502a02ae3ae67084f5a0bf5a8253b19fa7a887f824e41e016cdb0ac532a06f"},
+    {file = "rpds_py-0.22.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a083221b6a4ecdef38a60c95d8d3223d99449cb4da2544e9644958dc16664eb9"},
+    {file = "rpds_py-0.22.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:542eb246d5be31b5e0a9c8ddb9539416f9b31f58f75bd4ee328bff2b5c58d6fd"},
+    {file = "rpds_py-0.22.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffae97d28ea4f2c613a751d087b75a97fb78311b38cc2e9a2f4587e473ace167"},
+    {file = "rpds_py-0.22.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0ff8d5b13ce2357fa8b33a0a2e3775aa71df5bf7c8ba060634c9d15ab12f357"},
+    {file = "rpds_py-0.22.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0f057a0c546c42964836b209d8de9ea1a4f4b0432006c6343cbe633d8ca14571"},
+    {file = "rpds_py-0.22.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:48ee97c7c6027fd423058675b5a39d0b5f7a1648250b671563d5c9f74ff13ff0"},
+    {file = "rpds_py-0.22.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:babec324e8654a59122aaa66936a9a483faa03276db9792f51332475c2dddc4a"},
+    {file = "rpds_py-0.22.1-cp313-cp313t-win32.whl", hash = "sha256:e69acdbc132c9592c8dc393af85e38e206ca847c7019a953ff625191c3a12312"},
+    {file = "rpds_py-0.22.1-cp313-cp313t-win_amd64.whl", hash = "sha256:c783e4ed68200f4e03c125690d23158b1c49c4b186d458a18debc109bbdc3c2e"},
+    {file = "rpds_py-0.22.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:2143c3aed85992604d758bbe67da839fb4aab3dd2e1c6dddab5b3ca7162b34a2"},
+    {file = "rpds_py-0.22.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f57e2d0f8022783426121b586d7c842ea40ea832a29e28ca36c881b54c74fb28"},
+    {file = "rpds_py-0.22.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c0c324879d483504b07f7b18eb1b50567c434263bbe4866ecce33056162668a"},
+    {file = "rpds_py-0.22.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1c40e02cc4f3e18fd39344edb10eebe04bd11cfd13119606b5771e5ea51630d3"},
+    {file = "rpds_py-0.22.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f76c6f319e57007ad52e671ec741d801324760a377e3d4992c9bb8200333ebac"},
+    {file = "rpds_py-0.22.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f5cae9b415ea8a6a563566dbf46650222eccc5971c7daa16fbee63aef92ae543"},
+    {file = "rpds_py-0.22.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b09209cdfcacf5eba9cf80367130532e6c02e695252e1f64d3cfcc2356e6e19f"},
+    {file = "rpds_py-0.22.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dbe428d0ac6eacaf05402adbaf137f59ad6063848182d1ff294f95ce0f24005b"},
+    {file = "rpds_py-0.22.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:626b9feb01bff049a5aec4804f0c58db12585778b4902e5376a95b01f80a7a16"},
+    {file = "rpds_py-0.22.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ec1ccc2a9f764cd632fb8ab28fdde166250df54fc8d97315a4a6948dc5367639"},
+    {file = "rpds_py-0.22.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ef92b1fbe6aa2e7885eb90853cc016b1fc95439a8cc8da6d526880e9e2148695"},
+    {file = "rpds_py-0.22.1-cp39-cp39-win32.whl", hash = "sha256:c88535f83f7391cf3a45af990237e3939a6fdfbedaed2571633bfdd0bceb36b0"},
+    {file = "rpds_py-0.22.1-cp39-cp39-win_amd64.whl", hash = "sha256:7839b7528faa4d134c183b1f2dd1ee4dc2ca2f899f4f0cfdf00fc04c255262a7"},
+    {file = "rpds_py-0.22.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a0ed14a4162c2c2b21a162c9fcf90057e3e7da18cd171ab344c1e1664f75090e"},
+    {file = "rpds_py-0.22.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:05fdeae9010533e47715c37df83264df0122584e40d691d50cf3607c060952a3"},
+    {file = "rpds_py-0.22.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4659b2e4a5008715099e216050f5c6976e5a4329482664411789968b82e3f17d"},
+    {file = "rpds_py-0.22.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a18aedc032d6468b73ebbe4437129cb30d54fe543cde2f23671ecad76c3aea24"},
+    {file = "rpds_py-0.22.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:149b4d875ef9b12a8f5e303e86a32a58f8ef627e57ec97a7d0e4be819069d141"},
+    {file = "rpds_py-0.22.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fdaee3947eaaa52dae3ceb9d9f66329e13d8bae35682b1e5dd54612938693934"},
+    {file = "rpds_py-0.22.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36ce951800ed2acc6772fd9f42150f29d567f0423989748052fdb39d9e2b5795"},
+    {file = "rpds_py-0.22.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ab784621d3e2a41916e21f13a483602cc989fd45fff637634b9231ba43d4383b"},
+    {file = "rpds_py-0.22.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:c2a214bf5b79bd39a9de1c991353aaaacafda83ba1374178309e92be8e67d411"},
+    {file = "rpds_py-0.22.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:85060e96953647871957d41707adb8d7bff4e977042fd0deb4fc1881b98dd2fe"},
+    {file = "rpds_py-0.22.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:c6f3fd617db422c9d4e12cb8d84c984fe07d6d9cb0950cbf117f3bccc6268d05"},
+    {file = "rpds_py-0.22.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f2d1b58a0c3a73f0361759642e80260a6d28eee6501b40fe25b82af33ef83f21"},
+    {file = "rpds_py-0.22.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:76eaa4c087a061a2c8a0a92536405069878a8f530c00e84a9eaf332e70f5561f"},
+    {file = "rpds_py-0.22.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:959ae04ed30cde606f3a0320f0a1f4167a107e685ef5209cce28c5080590bd31"},
+    {file = "rpds_py-0.22.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:198067aa6f3d942ff5d0d655bb1e91b59ae85279d47590682cba2834ac1b97d2"},
+    {file = "rpds_py-0.22.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3e7e99e2af59c56c59b6c964d612511b8203480d39d1ef83edc56f2cb42a3f5d"},
+    {file = "rpds_py-0.22.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0545928bdf53dfdfcab284468212efefb8a6608ca3b6910c7fb2e5ed8bdc2dc0"},
+    {file = "rpds_py-0.22.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ef7282d8a14b60dd515e47060638687710b1d518f4b5e961caad43fb3a3606f9"},
+    {file = "rpds_py-0.22.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe3f245c2f39a5692d9123c174bc48f6f9fe3e96407e67c6d04541a767d99e72"},
+    {file = "rpds_py-0.22.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:efb2ad60ca8637d5f9f653f9a9a8d73964059972b6b95036be77e028bffc68a3"},
+    {file = "rpds_py-0.22.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:d8306f27418361b788e3fca9f47dec125457f80122e7e31ba7ff5cdba98343f8"},
+    {file = "rpds_py-0.22.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:4c8dc7331e8cbb1c0ea2bcb550adb1777365944ffd125c69aa1117fdef4887f5"},
+    {file = "rpds_py-0.22.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:776a06cb5720556a549829896a49acebb5bdd96c7bba100191a994053546975a"},
+    {file = "rpds_py-0.22.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e4f91d702b9ce1388660b3d4a28aa552614a1399e93f718ed0dacd68f23b3d32"},
+    {file = "rpds_py-0.22.1.tar.gz", hash = "sha256:157a023bded0618a1eea54979fe2e0f9309e9ddc818ef4b8fc3b884ff38fedd5"},
 ]

 [[package]]
@ -7647,4 +7644,4 @@ tesserocr = ["tesserocr"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "33ee730cf750e618ec005ad44ad09617bc8f95632b30ac02b5290a03a33bdf5b"
+content-hash = "0d9d498f50601c95a8616797441f00597acdea1e6a70d3b9642c17ffacc1bb45"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -26,9 +26,10 @@ packages = [{include = "docling"}]
 ######################
 python = "^3.9"
 pydantic = ">=2.0.0,<2.10"
-docling-core = "^2.6.1"
-docling-ibm-models = "^2.0.6"
-deepsearch-glm = "^0.26.1"
+docling-core = { git = "ssh://git@github.com/DS4SD/docling-core.git", branch = "feat-add-legacy-convert" }
+docling-ibm-models = { git = "ssh://git@github.com/DS4SD/docling-ibm-models.git", branch = "nli/performance" }
+deepsearch-glm = { git = "ssh://git@github.com/DS4SD/deepsearch-glm.git", branch = "cau/layout-processing-children-payloads" }
+docling-parse = { git = "ssh://git@github.com/DS4SD/docling-parse.git", branch = "dev/expose-cell-sanitisation-via-python" }
 filetype = "^1.2.0"
 pypdfium2 = "^4.30.0"
 pydantic-settings = "^2.3.0"
@ -36,7 +37,6 @@ huggingface_hub = ">=0.23,<1"
 requests = "^2.32.3"
 easyocr = "^1.7"
 tesserocr = { version = "^2.7.1", optional = true }
-docling-parse = "^2.0.5"
 certifi = ">=2024.7.4"
 rtree = "^1.3.0"
 scipy = "^1.6.0"