mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
feat(experimental): Add experimental TableCropsLayoutModel (#2669)
* feat: Scaffolding for layout and table model plugin factory Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add missing files Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add base options classes for layout and table Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * feat(experimental): Add experimental TableCropsLayoutModel Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add example Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
@@ -71,6 +71,7 @@ from docling.datamodel.pipeline_options import (
|
|||||||
PipelineOptions,
|
PipelineOptions,
|
||||||
ProcessingPipeline,
|
ProcessingPipeline,
|
||||||
TableFormerMode,
|
TableFormerMode,
|
||||||
|
TableStructureOptions,
|
||||||
TesseractCliOcrOptions,
|
TesseractCliOcrOptions,
|
||||||
TesseractOcrOptions,
|
TesseractOcrOptions,
|
||||||
VlmPipelineOptions,
|
VlmPipelineOptions,
|
||||||
@@ -645,10 +646,13 @@ def convert( # noqa: C901
|
|||||||
do_picture_classification=enrich_picture_classes,
|
do_picture_classification=enrich_picture_classes,
|
||||||
document_timeout=document_timeout,
|
document_timeout=document_timeout,
|
||||||
)
|
)
|
||||||
pipeline_options.table_structure_options.do_cell_matching = (
|
if isinstance(
|
||||||
True # do_cell_matching
|
pipeline_options.table_structure_options, TableStructureOptions
|
||||||
)
|
):
|
||||||
pipeline_options.table_structure_options.mode = table_mode
|
pipeline_options.table_structure_options.do_cell_matching = (
|
||||||
|
True # do_cell_matching
|
||||||
|
)
|
||||||
|
pipeline_options.table_structure_options.mode = table_mode
|
||||||
|
|
||||||
if image_export_mode != ImageRefMode.PLACEHOLDER:
|
if image_export_mode != ImageRefMode.PLACEHOLDER:
|
||||||
pipeline_options.generate_page_images = True
|
pipeline_options.generate_page_images = True
|
||||||
|
|||||||
@@ -354,9 +354,9 @@ class PdfPipelineOptions(PaginatedPipelineOptions):
|
|||||||
)
|
)
|
||||||
# If True, text from backend will be used instead of generated text
|
# If True, text from backend will be used instead of generated text
|
||||||
|
|
||||||
table_structure_options: TableStructureOptions = TableStructureOptions()
|
table_structure_options: BaseTableStructureOptions = TableStructureOptions()
|
||||||
ocr_options: OcrOptions = OcrAutoOptions()
|
ocr_options: OcrOptions = OcrAutoOptions()
|
||||||
layout_options: LayoutOptions = LayoutOptions()
|
layout_options: BaseLayoutOptions = LayoutOptions()
|
||||||
|
|
||||||
images_scale: float = 1.0
|
images_scale: float = 1.0
|
||||||
generate_page_images: bool = False
|
generate_page_images: bool = False
|
||||||
|
|||||||
13
docling/experimental/datamodel/table_crops_layout_options.py
Normal file
13
docling/experimental/datamodel/table_crops_layout_options.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
"""Internal options for the experimental TableCrops layout model."""
|
||||||
|
|
||||||
|
from typing import ClassVar
|
||||||
|
|
||||||
|
from docling.datamodel.pipeline_options import BaseLayoutOptions
|
||||||
|
|
||||||
|
__all__ = ["TableCropsLayoutOptions"]
|
||||||
|
|
||||||
|
|
||||||
|
class TableCropsLayoutOptions(BaseLayoutOptions):
|
||||||
|
"""Options for TableCropsLayoutModel (internal-only)."""
|
||||||
|
|
||||||
|
kind: ClassVar[str] = "docling_experimental_table_crops_layout"
|
||||||
3
docling/experimental/models/__init__.py
Normal file
3
docling/experimental/models/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
"""Experimental models for Docling."""
|
||||||
|
|
||||||
|
__all__: list[str] = []
|
||||||
114
docling/experimental/models/table_crops_layout_model.py
Normal file
114
docling/experimental/models/table_crops_layout_model.py
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
"""Internal TableCrops layout model that marks full pages as table clusters."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
from collections.abc import Sequence
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from docling_core.types.doc import DocItemLabel
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorOptions
|
||||||
|
from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page
|
||||||
|
from docling.datamodel.document import ConversionResult
|
||||||
|
from docling.experimental.datamodel.table_crops_layout_options import (
|
||||||
|
TableCropsLayoutOptions,
|
||||||
|
)
|
||||||
|
from docling.models.base_layout_model import BaseLayoutModel
|
||||||
|
|
||||||
|
__all__ = ["TableCropsLayoutModel"]
|
||||||
|
|
||||||
|
|
||||||
|
class TableCropsLayoutModel(BaseLayoutModel):
|
||||||
|
"""Experimental layout model that treats the full page as a table cluster.
|
||||||
|
This is useful in cases where a Docling pipeline is applied to images of table crops only.
|
||||||
|
|
||||||
|
This model is internal and not part of the stable public interface.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
artifacts_path: Optional[Path],
|
||||||
|
accelerator_options: AcceleratorOptions,
|
||||||
|
options: TableCropsLayoutOptions,
|
||||||
|
):
|
||||||
|
self.options = options
|
||||||
|
self.artifacts_path = artifacts_path
|
||||||
|
self.accelerator_options = accelerator_options
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_options_type(cls) -> type[TableCropsLayoutOptions]:
|
||||||
|
return TableCropsLayoutOptions
|
||||||
|
|
||||||
|
def predict_layout(
|
||||||
|
self,
|
||||||
|
conv_res: ConversionResult,
|
||||||
|
pages: Sequence[Page],
|
||||||
|
) -> Sequence[LayoutPrediction]:
|
||||||
|
layout_predictions: list[LayoutPrediction] = []
|
||||||
|
|
||||||
|
for page in pages:
|
||||||
|
if page._backend is None or not page._backend.is_valid():
|
||||||
|
existing_prediction = page.predictions.layout or LayoutPrediction()
|
||||||
|
layout_predictions.append(existing_prediction)
|
||||||
|
continue
|
||||||
|
|
||||||
|
clusters = self._build_page_clusters(page)
|
||||||
|
prediction = LayoutPrediction(clusters=clusters)
|
||||||
|
|
||||||
|
self._update_confidence(conv_res, page, clusters)
|
||||||
|
|
||||||
|
layout_predictions.append(prediction)
|
||||||
|
|
||||||
|
return layout_predictions
|
||||||
|
|
||||||
|
def _build_page_clusters(self, page: Page) -> list[Cluster]:
|
||||||
|
page_size = page.size
|
||||||
|
if page_size is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
bbox = BoundingBox(
|
||||||
|
l=0.0,
|
||||||
|
t=0.0,
|
||||||
|
r=page_size.width,
|
||||||
|
b=page_size.height,
|
||||||
|
)
|
||||||
|
|
||||||
|
cluster = Cluster(
|
||||||
|
id=0,
|
||||||
|
label=DocItemLabel.TABLE,
|
||||||
|
bbox=bbox,
|
||||||
|
confidence=1.0,
|
||||||
|
cells=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
clusters = [cluster]
|
||||||
|
|
||||||
|
if not self.options.skip_cell_assignment:
|
||||||
|
page_cells = list(page.cells)
|
||||||
|
cluster.cells = page_cells
|
||||||
|
|
||||||
|
if not page_cells and not self.options.keep_empty_clusters:
|
||||||
|
clusters = []
|
||||||
|
|
||||||
|
return clusters
|
||||||
|
|
||||||
|
def _update_confidence(
|
||||||
|
self, conv_res: ConversionResult, page: Page, clusters: list[Cluster]
|
||||||
|
) -> None:
|
||||||
|
"""Populate layout and OCR confidence scores for the page."""
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
warnings.filterwarnings(
|
||||||
|
"ignore",
|
||||||
|
"Mean of empty slice|invalid value encountered in scalar divide",
|
||||||
|
RuntimeWarning,
|
||||||
|
"numpy",
|
||||||
|
)
|
||||||
|
|
||||||
|
conv_res.confidence.pages[page.page_no].layout_score = 1.0
|
||||||
|
|
||||||
|
ocr_cells = [cell for cell in page.cells if cell.from_ocr]
|
||||||
|
ocr_confidence = float(np.mean([cell.confidence for cell in ocr_cells]))
|
||||||
|
conv_res.confidence.pages[page.page_no].ocr_score = ocr_confidence
|
||||||
@@ -31,11 +31,15 @@ def picture_description():
|
|||||||
|
|
||||||
|
|
||||||
def layout_engines():
|
def layout_engines():
|
||||||
|
from docling.experimental.models.table_crops_layout_model import (
|
||||||
|
TableCropsLayoutModel,
|
||||||
|
)
|
||||||
from docling.models.layout_model import LayoutModel
|
from docling.models.layout_model import LayoutModel
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"layout_engines": [
|
"layout_engines": [
|
||||||
LayoutModel,
|
LayoutModel,
|
||||||
|
TableCropsLayoutModel,
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
40
docs/examples/experimental/process_table_crops.py
vendored
Normal file
40
docs/examples/experimental/process_table_crops.py
vendored
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
"""Run Docling on an image using the experimental TableCrops layout model."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import docling
|
||||||
|
from docling.datamodel.document import InputFormat
|
||||||
|
from docling.datamodel.pipeline_options import ThreadedPdfPipelineOptions
|
||||||
|
from docling.document_converter import DocumentConverter, ImageFormatOption
|
||||||
|
from docling.experimental.datamodel.table_crops_layout_options import (
|
||||||
|
TableCropsLayoutOptions,
|
||||||
|
)
|
||||||
|
from docling.experimental.models.table_crops_layout_model import TableCropsLayoutModel
|
||||||
|
from docling.models.factories import get_layout_factory
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
sample_image = "tests/data/2305.03393v1-table_crop.png"
|
||||||
|
|
||||||
|
pipeline_options = ThreadedPdfPipelineOptions(
|
||||||
|
layout_options=TableCropsLayoutOptions(),
|
||||||
|
do_table_structure=True,
|
||||||
|
generate_page_images=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
converter = DocumentConverter(
|
||||||
|
allowed_formats=[InputFormat.IMAGE],
|
||||||
|
format_options={
|
||||||
|
InputFormat.IMAGE: ImageFormatOption(pipeline_options=pipeline_options)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
conv_res = converter.convert(sample_image)
|
||||||
|
|
||||||
|
print(conv_res.document.tables[0].export_to_markdown())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
BIN
tests/data/2305.03393v1-table_crop.png
vendored
Normal file
BIN
tests/data/2305.03393v1-table_crop.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 56 KiB |
Reference in New Issue
Block a user