mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
feat(experimental): Add experimental TableCropsLayoutModel (#2669)
* feat: Scaffolding for layout and table model plugin factory Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add missing files Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add base options classes for layout and table Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * feat(experimental): Add experimental TableCropsLayoutModel Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add example Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
@@ -71,6 +71,7 @@ from docling.datamodel.pipeline_options import (
|
||||
PipelineOptions,
|
||||
ProcessingPipeline,
|
||||
TableFormerMode,
|
||||
TableStructureOptions,
|
||||
TesseractCliOcrOptions,
|
||||
TesseractOcrOptions,
|
||||
VlmPipelineOptions,
|
||||
@@ -645,6 +646,9 @@ def convert( # noqa: C901
|
||||
do_picture_classification=enrich_picture_classes,
|
||||
document_timeout=document_timeout,
|
||||
)
|
||||
if isinstance(
|
||||
pipeline_options.table_structure_options, TableStructureOptions
|
||||
):
|
||||
pipeline_options.table_structure_options.do_cell_matching = (
|
||||
True # do_cell_matching
|
||||
)
|
||||
|
||||
@@ -354,9 +354,9 @@ class PdfPipelineOptions(PaginatedPipelineOptions):
|
||||
)
|
||||
# If True, text from backend will be used instead of generated text
|
||||
|
||||
table_structure_options: TableStructureOptions = TableStructureOptions()
|
||||
table_structure_options: BaseTableStructureOptions = TableStructureOptions()
|
||||
ocr_options: OcrOptions = OcrAutoOptions()
|
||||
layout_options: LayoutOptions = LayoutOptions()
|
||||
layout_options: BaseLayoutOptions = LayoutOptions()
|
||||
|
||||
images_scale: float = 1.0
|
||||
generate_page_images: bool = False
|
||||
|
||||
13
docling/experimental/datamodel/table_crops_layout_options.py
Normal file
13
docling/experimental/datamodel/table_crops_layout_options.py
Normal file
@@ -0,0 +1,13 @@
|
||||
"""Internal options for the experimental TableCrops layout model."""
|
||||
|
||||
from typing import ClassVar
|
||||
|
||||
from docling.datamodel.pipeline_options import BaseLayoutOptions
|
||||
|
||||
__all__ = ["TableCropsLayoutOptions"]
|
||||
|
||||
|
||||
class TableCropsLayoutOptions(BaseLayoutOptions):
|
||||
"""Options for TableCropsLayoutModel (internal-only)."""
|
||||
|
||||
kind: ClassVar[str] = "docling_experimental_table_crops_layout"
|
||||
3
docling/experimental/models/__init__.py
Normal file
3
docling/experimental/models/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""Experimental models for Docling."""
|
||||
|
||||
__all__: list[str] = []
|
||||
114
docling/experimental/models/table_crops_layout_model.py
Normal file
114
docling/experimental/models/table_crops_layout_model.py
Normal file
@@ -0,0 +1,114 @@
|
||||
"""Internal TableCrops layout model that marks full pages as table clusters."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from collections.abc import Sequence
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
from docling_core.types.doc import DocItemLabel
|
||||
|
||||
from docling.datamodel.accelerator_options import AcceleratorOptions
|
||||
from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.experimental.datamodel.table_crops_layout_options import (
|
||||
TableCropsLayoutOptions,
|
||||
)
|
||||
from docling.models.base_layout_model import BaseLayoutModel
|
||||
|
||||
__all__ = ["TableCropsLayoutModel"]
|
||||
|
||||
|
||||
class TableCropsLayoutModel(BaseLayoutModel):
|
||||
"""Experimental layout model that treats the full page as a table cluster.
|
||||
This is useful in cases where a Docling pipeline is applied to images of table crops only.
|
||||
|
||||
This model is internal and not part of the stable public interface.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
artifacts_path: Optional[Path],
|
||||
accelerator_options: AcceleratorOptions,
|
||||
options: TableCropsLayoutOptions,
|
||||
):
|
||||
self.options = options
|
||||
self.artifacts_path = artifacts_path
|
||||
self.accelerator_options = accelerator_options
|
||||
|
||||
@classmethod
|
||||
def get_options_type(cls) -> type[TableCropsLayoutOptions]:
|
||||
return TableCropsLayoutOptions
|
||||
|
||||
def predict_layout(
|
||||
self,
|
||||
conv_res: ConversionResult,
|
||||
pages: Sequence[Page],
|
||||
) -> Sequence[LayoutPrediction]:
|
||||
layout_predictions: list[LayoutPrediction] = []
|
||||
|
||||
for page in pages:
|
||||
if page._backend is None or not page._backend.is_valid():
|
||||
existing_prediction = page.predictions.layout or LayoutPrediction()
|
||||
layout_predictions.append(existing_prediction)
|
||||
continue
|
||||
|
||||
clusters = self._build_page_clusters(page)
|
||||
prediction = LayoutPrediction(clusters=clusters)
|
||||
|
||||
self._update_confidence(conv_res, page, clusters)
|
||||
|
||||
layout_predictions.append(prediction)
|
||||
|
||||
return layout_predictions
|
||||
|
||||
def _build_page_clusters(self, page: Page) -> list[Cluster]:
|
||||
page_size = page.size
|
||||
if page_size is None:
|
||||
return []
|
||||
|
||||
bbox = BoundingBox(
|
||||
l=0.0,
|
||||
t=0.0,
|
||||
r=page_size.width,
|
||||
b=page_size.height,
|
||||
)
|
||||
|
||||
cluster = Cluster(
|
||||
id=0,
|
||||
label=DocItemLabel.TABLE,
|
||||
bbox=bbox,
|
||||
confidence=1.0,
|
||||
cells=[],
|
||||
)
|
||||
|
||||
clusters = [cluster]
|
||||
|
||||
if not self.options.skip_cell_assignment:
|
||||
page_cells = list(page.cells)
|
||||
cluster.cells = page_cells
|
||||
|
||||
if not page_cells and not self.options.keep_empty_clusters:
|
||||
clusters = []
|
||||
|
||||
return clusters
|
||||
|
||||
def _update_confidence(
|
||||
self, conv_res: ConversionResult, page: Page, clusters: list[Cluster]
|
||||
) -> None:
|
||||
"""Populate layout and OCR confidence scores for the page."""
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore",
|
||||
"Mean of empty slice|invalid value encountered in scalar divide",
|
||||
RuntimeWarning,
|
||||
"numpy",
|
||||
)
|
||||
|
||||
conv_res.confidence.pages[page.page_no].layout_score = 1.0
|
||||
|
||||
ocr_cells = [cell for cell in page.cells if cell.from_ocr]
|
||||
ocr_confidence = float(np.mean([cell.confidence for cell in ocr_cells]))
|
||||
conv_res.confidence.pages[page.page_no].ocr_score = ocr_confidence
|
||||
@@ -31,11 +31,15 @@ def picture_description():
|
||||
|
||||
|
||||
def layout_engines():
|
||||
from docling.experimental.models.table_crops_layout_model import (
|
||||
TableCropsLayoutModel,
|
||||
)
|
||||
from docling.models.layout_model import LayoutModel
|
||||
|
||||
return {
|
||||
"layout_engines": [
|
||||
LayoutModel,
|
||||
TableCropsLayoutModel,
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
40
docs/examples/experimental/process_table_crops.py
vendored
Normal file
40
docs/examples/experimental/process_table_crops.py
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
"""Run Docling on an image using the experimental TableCrops layout model."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import docling
|
||||
from docling.datamodel.document import InputFormat
|
||||
from docling.datamodel.pipeline_options import ThreadedPdfPipelineOptions
|
||||
from docling.document_converter import DocumentConverter, ImageFormatOption
|
||||
from docling.experimental.datamodel.table_crops_layout_options import (
|
||||
TableCropsLayoutOptions,
|
||||
)
|
||||
from docling.experimental.models.table_crops_layout_model import TableCropsLayoutModel
|
||||
from docling.models.factories import get_layout_factory
|
||||
|
||||
|
||||
def main() -> None:
|
||||
sample_image = "tests/data/2305.03393v1-table_crop.png"
|
||||
|
||||
pipeline_options = ThreadedPdfPipelineOptions(
|
||||
layout_options=TableCropsLayoutOptions(),
|
||||
do_table_structure=True,
|
||||
generate_page_images=True,
|
||||
)
|
||||
|
||||
converter = DocumentConverter(
|
||||
allowed_formats=[InputFormat.IMAGE],
|
||||
format_options={
|
||||
InputFormat.IMAGE: ImageFormatOption(pipeline_options=pipeline_options)
|
||||
},
|
||||
)
|
||||
|
||||
conv_res = converter.convert(sample_image)
|
||||
|
||||
print(conv_res.document.tables[0].export_to_markdown())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
BIN
tests/data/2305.03393v1-table_crop.png
vendored
Normal file
BIN
tests/data/2305.03393v1-table_crop.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 56 KiB |
Reference in New Issue
Block a user