diff --git a/docling/cli/main.py b/docling/cli/main.py index 63324645..9dddadba 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -71,6 +71,7 @@ from docling.datamodel.pipeline_options import ( PipelineOptions, ProcessingPipeline, TableFormerMode, + TableStructureOptions, TesseractCliOcrOptions, TesseractOcrOptions, VlmPipelineOptions, @@ -645,10 +646,13 @@ def convert( # noqa: C901 do_picture_classification=enrich_picture_classes, document_timeout=document_timeout, ) - pipeline_options.table_structure_options.do_cell_matching = ( - True # do_cell_matching - ) - pipeline_options.table_structure_options.mode = table_mode + if isinstance( + pipeline_options.table_structure_options, TableStructureOptions + ): + pipeline_options.table_structure_options.do_cell_matching = ( + True # do_cell_matching + ) + pipeline_options.table_structure_options.mode = table_mode if image_export_mode != ImageRefMode.PLACEHOLDER: pipeline_options.generate_page_images = True diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 052b5621..30d4e50c 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -354,9 +354,9 @@ class PdfPipelineOptions(PaginatedPipelineOptions): ) # If True, text from backend will be used instead of generated text - table_structure_options: TableStructureOptions = TableStructureOptions() + table_structure_options: BaseTableStructureOptions = TableStructureOptions() ocr_options: OcrOptions = OcrAutoOptions() - layout_options: LayoutOptions = LayoutOptions() + layout_options: BaseLayoutOptions = LayoutOptions() images_scale: float = 1.0 generate_page_images: bool = False diff --git a/docling/experimental/datamodel/table_crops_layout_options.py b/docling/experimental/datamodel/table_crops_layout_options.py new file mode 100644 index 00000000..7e4cfaec --- /dev/null +++ b/docling/experimental/datamodel/table_crops_layout_options.py @@ -0,0 +1,13 @@ +"""Internal options for the experimental TableCrops layout model.""" + +from typing import ClassVar + +from docling.datamodel.pipeline_options import BaseLayoutOptions + +__all__ = ["TableCropsLayoutOptions"] + + +class TableCropsLayoutOptions(BaseLayoutOptions): + """Options for TableCropsLayoutModel (internal-only).""" + + kind: ClassVar[str] = "docling_experimental_table_crops_layout" diff --git a/docling/experimental/models/__init__.py b/docling/experimental/models/__init__.py new file mode 100644 index 00000000..4c1c1db2 --- /dev/null +++ b/docling/experimental/models/__init__.py @@ -0,0 +1,3 @@ +"""Experimental models for Docling.""" + +__all__: list[str] = [] diff --git a/docling/experimental/models/table_crops_layout_model.py b/docling/experimental/models/table_crops_layout_model.py new file mode 100644 index 00000000..726956c8 --- /dev/null +++ b/docling/experimental/models/table_crops_layout_model.py @@ -0,0 +1,114 @@ +"""Internal TableCrops layout model that marks full pages as table clusters.""" + +from __future__ import annotations + +import warnings +from collections.abc import Sequence +from pathlib import Path +from typing import Optional + +import numpy as np +from docling_core.types.doc import DocItemLabel + +from docling.datamodel.accelerator_options import AcceleratorOptions +from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page +from docling.datamodel.document import ConversionResult +from docling.experimental.datamodel.table_crops_layout_options import ( + TableCropsLayoutOptions, +) +from docling.models.base_layout_model import BaseLayoutModel + +__all__ = ["TableCropsLayoutModel"] + + +class TableCropsLayoutModel(BaseLayoutModel): + """Experimental layout model that treats the full page as a table cluster. + This is useful in cases where a Docling pipeline is applied to images of table crops only. + + This model is internal and not part of the stable public interface. + """ + + def __init__( + self, + artifacts_path: Optional[Path], + accelerator_options: AcceleratorOptions, + options: TableCropsLayoutOptions, + ): + self.options = options + self.artifacts_path = artifacts_path + self.accelerator_options = accelerator_options + + @classmethod + def get_options_type(cls) -> type[TableCropsLayoutOptions]: + return TableCropsLayoutOptions + + def predict_layout( + self, + conv_res: ConversionResult, + pages: Sequence[Page], + ) -> Sequence[LayoutPrediction]: + layout_predictions: list[LayoutPrediction] = [] + + for page in pages: + if page._backend is None or not page._backend.is_valid(): + existing_prediction = page.predictions.layout or LayoutPrediction() + layout_predictions.append(existing_prediction) + continue + + clusters = self._build_page_clusters(page) + prediction = LayoutPrediction(clusters=clusters) + + self._update_confidence(conv_res, page, clusters) + + layout_predictions.append(prediction) + + return layout_predictions + + def _build_page_clusters(self, page: Page) -> list[Cluster]: + page_size = page.size + if page_size is None: + return [] + + bbox = BoundingBox( + l=0.0, + t=0.0, + r=page_size.width, + b=page_size.height, + ) + + cluster = Cluster( + id=0, + label=DocItemLabel.TABLE, + bbox=bbox, + confidence=1.0, + cells=[], + ) + + clusters = [cluster] + + if not self.options.skip_cell_assignment: + page_cells = list(page.cells) + cluster.cells = page_cells + + if not page_cells and not self.options.keep_empty_clusters: + clusters = [] + + return clusters + + def _update_confidence( + self, conv_res: ConversionResult, page: Page, clusters: list[Cluster] + ) -> None: + """Populate layout and OCR confidence scores for the page.""" + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "Mean of empty slice|invalid value encountered in scalar divide", + RuntimeWarning, + "numpy", + ) + + conv_res.confidence.pages[page.page_no].layout_score = 1.0 + + ocr_cells = [cell for cell in page.cells if cell.from_ocr] + ocr_confidence = float(np.mean([cell.confidence for cell in ocr_cells])) + conv_res.confidence.pages[page.page_no].ocr_score = ocr_confidence diff --git a/docling/models/plugins/defaults.py b/docling/models/plugins/defaults.py index 06b87080..11c8d7b3 100644 --- a/docling/models/plugins/defaults.py +++ b/docling/models/plugins/defaults.py @@ -31,11 +31,15 @@ def picture_description(): def layout_engines(): + from docling.experimental.models.table_crops_layout_model import ( + TableCropsLayoutModel, + ) from docling.models.layout_model import LayoutModel return { "layout_engines": [ LayoutModel, + TableCropsLayoutModel, ] } diff --git a/docs/examples/experimental/process_table_crops.py b/docs/examples/experimental/process_table_crops.py new file mode 100644 index 00000000..90c87db9 --- /dev/null +++ b/docs/examples/experimental/process_table_crops.py @@ -0,0 +1,40 @@ +"""Run Docling on an image using the experimental TableCrops layout model.""" + +from __future__ import annotations + +from pathlib import Path + +import docling +from docling.datamodel.document import InputFormat +from docling.datamodel.pipeline_options import ThreadedPdfPipelineOptions +from docling.document_converter import DocumentConverter, ImageFormatOption +from docling.experimental.datamodel.table_crops_layout_options import ( + TableCropsLayoutOptions, +) +from docling.experimental.models.table_crops_layout_model import TableCropsLayoutModel +from docling.models.factories import get_layout_factory + + +def main() -> None: + sample_image = "tests/data/2305.03393v1-table_crop.png" + + pipeline_options = ThreadedPdfPipelineOptions( + layout_options=TableCropsLayoutOptions(), + do_table_structure=True, + generate_page_images=True, + ) + + converter = DocumentConverter( + allowed_formats=[InputFormat.IMAGE], + format_options={ + InputFormat.IMAGE: ImageFormatOption(pipeline_options=pipeline_options) + }, + ) + + conv_res = converter.convert(sample_image) + + print(conv_res.document.tables[0].export_to_markdown()) + + +if __name__ == "__main__": + main() diff --git a/tests/data/2305.03393v1-table_crop.png b/tests/data/2305.03393v1-table_crop.png new file mode 100644 index 00000000..a29e7236 Binary files /dev/null and b/tests/data/2305.03393v1-table_crop.png differ