feat(experimental): Add experimental TableCropsLayoutModel (#2669)

* feat: Scaffolding for layout and table model plugin factory

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Add missing files

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Add base options classes for layout and table

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* feat(experimental): Add experimental TableCropsLayoutModel

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Add example

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-11-25 05:14:51 +01:00
committed by GitHub
parent b75c6461f4
commit 134436245a
8 changed files with 184 additions and 6 deletions

View File

@@ -71,6 +71,7 @@ from docling.datamodel.pipeline_options import (
PipelineOptions,
ProcessingPipeline,
TableFormerMode,
TableStructureOptions,
TesseractCliOcrOptions,
TesseractOcrOptions,
VlmPipelineOptions,
@@ -645,10 +646,13 @@ def convert( # noqa: C901
do_picture_classification=enrich_picture_classes,
document_timeout=document_timeout,
)
pipeline_options.table_structure_options.do_cell_matching = (
True # do_cell_matching
)
pipeline_options.table_structure_options.mode = table_mode
if isinstance(
pipeline_options.table_structure_options, TableStructureOptions
):
pipeline_options.table_structure_options.do_cell_matching = (
True # do_cell_matching
)
pipeline_options.table_structure_options.mode = table_mode
if image_export_mode != ImageRefMode.PLACEHOLDER:
pipeline_options.generate_page_images = True

View File

@@ -354,9 +354,9 @@ class PdfPipelineOptions(PaginatedPipelineOptions):
)
# If True, text from backend will be used instead of generated text
table_structure_options: TableStructureOptions = TableStructureOptions()
table_structure_options: BaseTableStructureOptions = TableStructureOptions()
ocr_options: OcrOptions = OcrAutoOptions()
layout_options: LayoutOptions = LayoutOptions()
layout_options: BaseLayoutOptions = LayoutOptions()
images_scale: float = 1.0
generate_page_images: bool = False

View File

@@ -0,0 +1,13 @@
"""Internal options for the experimental TableCrops layout model."""
from typing import ClassVar
from docling.datamodel.pipeline_options import BaseLayoutOptions
__all__ = ["TableCropsLayoutOptions"]
class TableCropsLayoutOptions(BaseLayoutOptions):
"""Options for TableCropsLayoutModel (internal-only)."""
kind: ClassVar[str] = "docling_experimental_table_crops_layout"

View File

@@ -0,0 +1,3 @@
"""Experimental models for Docling."""
__all__: list[str] = []

View File

@@ -0,0 +1,114 @@
"""Internal TableCrops layout model that marks full pages as table clusters."""
from __future__ import annotations
import warnings
from collections.abc import Sequence
from pathlib import Path
from typing import Optional
import numpy as np
from docling_core.types.doc import DocItemLabel
from docling.datamodel.accelerator_options import AcceleratorOptions
from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page
from docling.datamodel.document import ConversionResult
from docling.experimental.datamodel.table_crops_layout_options import (
TableCropsLayoutOptions,
)
from docling.models.base_layout_model import BaseLayoutModel
__all__ = ["TableCropsLayoutModel"]
class TableCropsLayoutModel(BaseLayoutModel):
"""Experimental layout model that treats the full page as a table cluster.
This is useful in cases where a Docling pipeline is applied to images of table crops only.
This model is internal and not part of the stable public interface.
"""
def __init__(
self,
artifacts_path: Optional[Path],
accelerator_options: AcceleratorOptions,
options: TableCropsLayoutOptions,
):
self.options = options
self.artifacts_path = artifacts_path
self.accelerator_options = accelerator_options
@classmethod
def get_options_type(cls) -> type[TableCropsLayoutOptions]:
return TableCropsLayoutOptions
def predict_layout(
self,
conv_res: ConversionResult,
pages: Sequence[Page],
) -> Sequence[LayoutPrediction]:
layout_predictions: list[LayoutPrediction] = []
for page in pages:
if page._backend is None or not page._backend.is_valid():
existing_prediction = page.predictions.layout or LayoutPrediction()
layout_predictions.append(existing_prediction)
continue
clusters = self._build_page_clusters(page)
prediction = LayoutPrediction(clusters=clusters)
self._update_confidence(conv_res, page, clusters)
layout_predictions.append(prediction)
return layout_predictions
def _build_page_clusters(self, page: Page) -> list[Cluster]:
page_size = page.size
if page_size is None:
return []
bbox = BoundingBox(
l=0.0,
t=0.0,
r=page_size.width,
b=page_size.height,
)
cluster = Cluster(
id=0,
label=DocItemLabel.TABLE,
bbox=bbox,
confidence=1.0,
cells=[],
)
clusters = [cluster]
if not self.options.skip_cell_assignment:
page_cells = list(page.cells)
cluster.cells = page_cells
if not page_cells and not self.options.keep_empty_clusters:
clusters = []
return clusters
def _update_confidence(
self, conv_res: ConversionResult, page: Page, clusters: list[Cluster]
) -> None:
"""Populate layout and OCR confidence scores for the page."""
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"Mean of empty slice|invalid value encountered in scalar divide",
RuntimeWarning,
"numpy",
)
conv_res.confidence.pages[page.page_no].layout_score = 1.0
ocr_cells = [cell for cell in page.cells if cell.from_ocr]
ocr_confidence = float(np.mean([cell.confidence for cell in ocr_cells]))
conv_res.confidence.pages[page.page_no].ocr_score = ocr_confidence

View File

@@ -31,11 +31,15 @@ def picture_description():
def layout_engines():
from docling.experimental.models.table_crops_layout_model import (
TableCropsLayoutModel,
)
from docling.models.layout_model import LayoutModel
return {
"layout_engines": [
LayoutModel,
TableCropsLayoutModel,
]
}

View File

@@ -0,0 +1,40 @@
"""Run Docling on an image using the experimental TableCrops layout model."""
from __future__ import annotations
from pathlib import Path
import docling
from docling.datamodel.document import InputFormat
from docling.datamodel.pipeline_options import ThreadedPdfPipelineOptions
from docling.document_converter import DocumentConverter, ImageFormatOption
from docling.experimental.datamodel.table_crops_layout_options import (
TableCropsLayoutOptions,
)
from docling.experimental.models.table_crops_layout_model import TableCropsLayoutModel
from docling.models.factories import get_layout_factory
def main() -> None:
sample_image = "tests/data/2305.03393v1-table_crop.png"
pipeline_options = ThreadedPdfPipelineOptions(
layout_options=TableCropsLayoutOptions(),
do_table_structure=True,
generate_page_images=True,
)
converter = DocumentConverter(
allowed_formats=[InputFormat.IMAGE],
format_options={
InputFormat.IMAGE: ImageFormatOption(pipeline_options=pipeline_options)
},
)
conv_res = converter.convert(sample_image)
print(conv_res.document.tables[0].export_to_markdown())
if __name__ == "__main__":
main()

BIN
tests/data/2305.03393v1-table_crop.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB