mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
feat: Factory and plugin-capability for Layout and Table models (#2637)
* feat: Scaffolding for layout and table model plugin factory Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add missing files Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add base options classes for layout and table Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
@@ -59,9 +59,14 @@ class TableFormerMode(str, Enum):
|
|||||||
ACCURATE = "accurate"
|
ACCURATE = "accurate"
|
||||||
|
|
||||||
|
|
||||||
class TableStructureOptions(BaseModel):
|
class BaseTableStructureOptions(BaseOptions):
|
||||||
|
"""Base options for table structure models."""
|
||||||
|
|
||||||
|
|
||||||
|
class TableStructureOptions(BaseTableStructureOptions):
|
||||||
"""Options for the table structure."""
|
"""Options for the table structure."""
|
||||||
|
|
||||||
|
kind: ClassVar[str] = "docling_tableformer"
|
||||||
do_cell_matching: bool = (
|
do_cell_matching: bool = (
|
||||||
True
|
True
|
||||||
# True: Matches predictions back to PDF cells. Can break table output if PDF cells
|
# True: Matches predictions back to PDF cells. Can break table output if PDF cells
|
||||||
@@ -308,19 +313,25 @@ class VlmPipelineOptions(PaginatedPipelineOptions):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LayoutOptions(BaseModel):
|
class BaseLayoutOptions(BaseOptions):
|
||||||
"""Options for layout processing."""
|
"""Base options for layout models."""
|
||||||
|
|
||||||
create_orphan_clusters: bool = True # Whether to create clusters for orphaned cells
|
|
||||||
keep_empty_clusters: bool = (
|
keep_empty_clusters: bool = (
|
||||||
False # Whether to keep clusters that contain no text cells
|
False # Whether to keep clusters that contain no text cells
|
||||||
)
|
)
|
||||||
model_spec: LayoutModelConfig = DOCLING_LAYOUT_HERON
|
|
||||||
skip_cell_assignment: bool = (
|
skip_cell_assignment: bool = (
|
||||||
False # Skip cell-to-cluster assignment for VLM-only processing
|
False # Skip cell-to-cluster assignment for VLM-only processing
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class LayoutOptions(BaseLayoutOptions):
|
||||||
|
"""Options for layout processing."""
|
||||||
|
|
||||||
|
kind: ClassVar[str] = "docling_layout_default"
|
||||||
|
create_orphan_clusters: bool = True # Whether to create clusters for orphaned cells
|
||||||
|
model_spec: LayoutModelConfig = DOCLING_LAYOUT_HERON
|
||||||
|
|
||||||
|
|
||||||
class AsrPipelineOptions(PipelineOptions):
|
class AsrPipelineOptions(PipelineOptions):
|
||||||
asr_options: Union[InlineAsrOptions] = asr_model_specs.WHISPER_TINY
|
asr_options: Union[InlineAsrOptions] = asr_model_specs.WHISPER_TINY
|
||||||
|
|
||||||
|
|||||||
39
docling/models/base_layout_model.py
Normal file
39
docling/models/base_layout_model.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from collections.abc import Iterable, Sequence
|
||||||
|
from typing import Type
|
||||||
|
|
||||||
|
from docling.datamodel.base_models import LayoutPrediction, Page
|
||||||
|
from docling.datamodel.document import ConversionResult
|
||||||
|
from docling.datamodel.pipeline_options import BaseLayoutOptions
|
||||||
|
from docling.models.base_model import BaseModelWithOptions, BasePageModel
|
||||||
|
|
||||||
|
|
||||||
|
class BaseLayoutModel(BasePageModel, BaseModelWithOptions, ABC):
|
||||||
|
"""Shared interface for layout models."""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@abstractmethod
|
||||||
|
def get_options_type(cls) -> Type[BaseLayoutOptions]:
|
||||||
|
"""Return the options type supported by this layout model."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def predict_layout(
|
||||||
|
self,
|
||||||
|
conv_res: ConversionResult,
|
||||||
|
pages: Sequence[Page],
|
||||||
|
) -> Sequence[LayoutPrediction]:
|
||||||
|
"""Produce layout predictions for the provided pages."""
|
||||||
|
|
||||||
|
def __call__(
|
||||||
|
self,
|
||||||
|
conv_res: ConversionResult,
|
||||||
|
page_batch: Iterable[Page],
|
||||||
|
) -> Iterable[Page]:
|
||||||
|
pages = list(page_batch)
|
||||||
|
predictions = self.predict_layout(conv_res, pages)
|
||||||
|
|
||||||
|
for page, prediction in zip(pages, predictions):
|
||||||
|
page.predictions.layout = prediction
|
||||||
|
yield page
|
||||||
45
docling/models/base_table_model.py
Normal file
45
docling/models/base_table_model.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from collections.abc import Iterable, Sequence
|
||||||
|
from typing import Type
|
||||||
|
|
||||||
|
from docling.datamodel.base_models import Page, TableStructurePrediction
|
||||||
|
from docling.datamodel.document import ConversionResult
|
||||||
|
from docling.datamodel.pipeline_options import BaseTableStructureOptions
|
||||||
|
from docling.models.base_model import BaseModelWithOptions, BasePageModel
|
||||||
|
|
||||||
|
|
||||||
|
class BaseTableStructureModel(BasePageModel, BaseModelWithOptions, ABC):
|
||||||
|
"""Shared interface for table structure models."""
|
||||||
|
|
||||||
|
enabled: bool
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@abstractmethod
|
||||||
|
def get_options_type(cls) -> Type[BaseTableStructureOptions]:
|
||||||
|
"""Return the options type supported by this table model."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def predict_tables(
|
||||||
|
self,
|
||||||
|
conv_res: ConversionResult,
|
||||||
|
pages: Sequence[Page],
|
||||||
|
) -> Sequence[TableStructurePrediction]:
|
||||||
|
"""Produce table structure predictions for the provided pages."""
|
||||||
|
|
||||||
|
def __call__(
|
||||||
|
self,
|
||||||
|
conv_res: ConversionResult,
|
||||||
|
page_batch: Iterable[Page],
|
||||||
|
) -> Iterable[Page]:
|
||||||
|
if not getattr(self, "enabled", True):
|
||||||
|
yield from page_batch
|
||||||
|
return
|
||||||
|
|
||||||
|
pages = list(page_batch)
|
||||||
|
predictions = self.predict_tables(conv_res, pages)
|
||||||
|
|
||||||
|
for page, prediction in zip(pages, predictions):
|
||||||
|
page.predictions.tablestructure = prediction
|
||||||
|
yield page
|
||||||
@@ -1,10 +1,12 @@
|
|||||||
import logging
|
import logging
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
|
||||||
|
from docling.models.factories.layout_factory import LayoutFactory
|
||||||
from docling.models.factories.ocr_factory import OcrFactory
|
from docling.models.factories.ocr_factory import OcrFactory
|
||||||
from docling.models.factories.picture_description_factory import (
|
from docling.models.factories.picture_description_factory import (
|
||||||
PictureDescriptionFactory,
|
PictureDescriptionFactory,
|
||||||
)
|
)
|
||||||
|
from docling.models.factories.table_factory import TableStructureFactory
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -25,3 +27,21 @@ def get_picture_description_factory(
|
|||||||
factory.load_from_plugins(allow_external_plugins=allow_external_plugins)
|
factory.load_from_plugins(allow_external_plugins=allow_external_plugins)
|
||||||
logger.info("Registered picture descriptions: %r", factory.registered_kind)
|
logger.info("Registered picture descriptions: %r", factory.registered_kind)
|
||||||
return factory
|
return factory
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache
|
||||||
|
def get_layout_factory(allow_external_plugins: bool = False) -> LayoutFactory:
|
||||||
|
factory = LayoutFactory()
|
||||||
|
factory.load_from_plugins(allow_external_plugins=allow_external_plugins)
|
||||||
|
logger.info("Registered layout engines: %r", factory.registered_kind)
|
||||||
|
return factory
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache
|
||||||
|
def get_table_structure_factory(
|
||||||
|
allow_external_plugins: bool = False,
|
||||||
|
) -> TableStructureFactory:
|
||||||
|
factory = TableStructureFactory()
|
||||||
|
factory.load_from_plugins(allow_external_plugins=allow_external_plugins)
|
||||||
|
logger.info("Registered table structure engines: %r", factory.registered_kind)
|
||||||
|
return factory
|
||||||
|
|||||||
7
docling/models/factories/layout_factory.py
Normal file
7
docling/models/factories/layout_factory.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
from docling.models.base_layout_model import BaseLayoutModel
|
||||||
|
from docling.models.factories.base_factory import BaseFactory
|
||||||
|
|
||||||
|
|
||||||
|
class LayoutFactory(BaseFactory[BaseLayoutModel]):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__("layout_engines", *args, **kwargs)
|
||||||
7
docling/models/factories/table_factory.py
Normal file
7
docling/models/factories/table_factory.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
from docling.models.base_table_model import BaseTableStructureModel
|
||||||
|
from docling.models.factories.base_factory import BaseFactory
|
||||||
|
|
||||||
|
|
||||||
|
class TableStructureFactory(BaseFactory[BaseTableStructureModel]):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__("table_structure_engines", *args, **kwargs)
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
import copy
|
import copy
|
||||||
import logging
|
import logging
|
||||||
import warnings
|
import warnings
|
||||||
from collections.abc import Iterable
|
from collections.abc import Sequence
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional, Union
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
@@ -15,7 +15,7 @@ from docling.datamodel.document import ConversionResult
|
|||||||
from docling.datamodel.layout_model_specs import DOCLING_LAYOUT_V2, LayoutModelConfig
|
from docling.datamodel.layout_model_specs import DOCLING_LAYOUT_V2, LayoutModelConfig
|
||||||
from docling.datamodel.pipeline_options import LayoutOptions
|
from docling.datamodel.pipeline_options import LayoutOptions
|
||||||
from docling.datamodel.settings import settings
|
from docling.datamodel.settings import settings
|
||||||
from docling.models.base_model import BasePageModel
|
from docling.models.base_layout_model import BaseLayoutModel
|
||||||
from docling.models.utils.hf_model_download import download_hf_model
|
from docling.models.utils.hf_model_download import download_hf_model
|
||||||
from docling.utils.accelerator_utils import decide_device
|
from docling.utils.accelerator_utils import decide_device
|
||||||
from docling.utils.layout_postprocessor import LayoutPostprocessor
|
from docling.utils.layout_postprocessor import LayoutPostprocessor
|
||||||
@@ -25,7 +25,7 @@ from docling.utils.visualization import draw_clusters
|
|||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class LayoutModel(BasePageModel):
|
class LayoutModel(BaseLayoutModel):
|
||||||
TEXT_ELEM_LABELS = [
|
TEXT_ELEM_LABELS = [
|
||||||
DocItemLabel.TEXT,
|
DocItemLabel.TEXT,
|
||||||
DocItemLabel.FOOTNOTE,
|
DocItemLabel.FOOTNOTE,
|
||||||
@@ -86,6 +86,10 @@ class LayoutModel(BasePageModel):
|
|||||||
num_threads=accelerator_options.num_threads,
|
num_threads=accelerator_options.num_threads,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_options_type(cls) -> type[LayoutOptions]:
|
||||||
|
return LayoutOptions
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def download_models(
|
def download_models(
|
||||||
local_dir: Optional[Path] = None,
|
local_dir: Optional[Path] = None,
|
||||||
@@ -145,11 +149,13 @@ class LayoutModel(BasePageModel):
|
|||||||
out_file = out_path / f"{mode_prefix}_layout_page_{page.page_no:05}.png"
|
out_file = out_path / f"{mode_prefix}_layout_page_{page.page_no:05}.png"
|
||||||
combined_image.save(str(out_file), format="png")
|
combined_image.save(str(out_file), format="png")
|
||||||
|
|
||||||
def __call__(
|
def predict_layout(
|
||||||
self, conv_res: ConversionResult, page_batch: Iterable[Page]
|
self,
|
||||||
) -> Iterable[Page]:
|
conv_res: ConversionResult,
|
||||||
# Convert to list to allow multiple iterations
|
pages: Sequence[Page],
|
||||||
pages = list(page_batch)
|
) -> Sequence[LayoutPrediction]:
|
||||||
|
# Convert to list to ensure predictable iteration
|
||||||
|
pages = list(pages)
|
||||||
|
|
||||||
# Separate valid and invalid pages
|
# Separate valid and invalid pages
|
||||||
valid_pages = []
|
valid_pages = []
|
||||||
@@ -167,12 +173,6 @@ class LayoutModel(BasePageModel):
|
|||||||
valid_pages.append(page)
|
valid_pages.append(page)
|
||||||
valid_page_images.append(page_image)
|
valid_page_images.append(page_image)
|
||||||
|
|
||||||
_log.debug(f"{len(pages)=}")
|
|
||||||
if pages:
|
|
||||||
_log.debug(f"{pages[0].page_no}-{pages[-1].page_no}")
|
|
||||||
_log.debug(f"{len(valid_pages)=}")
|
|
||||||
_log.debug(f"{len(valid_page_images)=}")
|
|
||||||
|
|
||||||
# Process all valid pages with batch prediction
|
# Process all valid pages with batch prediction
|
||||||
batch_predictions = []
|
batch_predictions = []
|
||||||
if valid_page_images:
|
if valid_page_images:
|
||||||
@@ -182,11 +182,14 @@ class LayoutModel(BasePageModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Process each page with its predictions
|
# Process each page with its predictions
|
||||||
|
layout_predictions: list[LayoutPrediction] = []
|
||||||
valid_page_idx = 0
|
valid_page_idx = 0
|
||||||
for page in pages:
|
for page in pages:
|
||||||
assert page._backend is not None
|
assert page._backend is not None
|
||||||
if not page._backend.is_valid():
|
if not page._backend.is_valid():
|
||||||
yield page
|
existing_prediction = page.predictions.layout or LayoutPrediction()
|
||||||
|
page.predictions.layout = existing_prediction
|
||||||
|
layout_predictions.append(existing_prediction)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
page_predictions = batch_predictions[valid_page_idx]
|
page_predictions = batch_predictions[valid_page_idx]
|
||||||
@@ -233,11 +236,14 @@ class LayoutModel(BasePageModel):
|
|||||||
np.mean([c.confidence for c in processed_cells if c.from_ocr])
|
np.mean([c.confidence for c in processed_cells if c.from_ocr])
|
||||||
)
|
)
|
||||||
|
|
||||||
page.predictions.layout = LayoutPrediction(clusters=processed_clusters)
|
prediction = LayoutPrediction(clusters=processed_clusters)
|
||||||
|
page.predictions.layout = prediction
|
||||||
|
|
||||||
if settings.debug.visualize_layout:
|
if settings.debug.visualize_layout:
|
||||||
self.draw_clusters_and_cells_side_by_side(
|
self.draw_clusters_and_cells_side_by_side(
|
||||||
conv_res, page, processed_clusters, mode_prefix="postprocessed"
|
conv_res, page, processed_clusters, mode_prefix="postprocessed"
|
||||||
)
|
)
|
||||||
|
|
||||||
yield page
|
layout_predictions.append(prediction)
|
||||||
|
|
||||||
|
return layout_predictions
|
||||||
|
|||||||
@@ -28,3 +28,23 @@ def picture_description():
|
|||||||
PictureDescriptionApiModel,
|
PictureDescriptionApiModel,
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def layout_engines():
|
||||||
|
from docling.models.layout_model import LayoutModel
|
||||||
|
|
||||||
|
return {
|
||||||
|
"layout_engines": [
|
||||||
|
LayoutModel,
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def table_structure_engines():
|
||||||
|
from docling.models.table_structure_model import TableStructureModel
|
||||||
|
|
||||||
|
return {
|
||||||
|
"table_structure_engines": [
|
||||||
|
TableStructureModel,
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import copy
|
import copy
|
||||||
import warnings
|
import warnings
|
||||||
from collections.abc import Iterable
|
from collections.abc import Iterable, Sequence
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
@@ -20,13 +20,13 @@ from docling.datamodel.pipeline_options import (
|
|||||||
TableStructureOptions,
|
TableStructureOptions,
|
||||||
)
|
)
|
||||||
from docling.datamodel.settings import settings
|
from docling.datamodel.settings import settings
|
||||||
from docling.models.base_model import BasePageModel
|
from docling.models.base_table_model import BaseTableStructureModel
|
||||||
from docling.models.utils.hf_model_download import download_hf_model
|
from docling.models.utils.hf_model_download import download_hf_model
|
||||||
from docling.utils.accelerator_utils import decide_device
|
from docling.utils.accelerator_utils import decide_device
|
||||||
from docling.utils.profiling import TimeRecorder
|
from docling.utils.profiling import TimeRecorder
|
||||||
|
|
||||||
|
|
||||||
class TableStructureModel(BasePageModel):
|
class TableStructureModel(BaseTableStructureModel):
|
||||||
_model_repo_folder = "docling-project--docling-models"
|
_model_repo_folder = "docling-project--docling-models"
|
||||||
_model_path = "model_artifacts/tableformer"
|
_model_path = "model_artifacts/tableformer"
|
||||||
|
|
||||||
@@ -88,6 +88,10 @@ class TableStructureModel(BasePageModel):
|
|||||||
)
|
)
|
||||||
self.scale = 2.0 # Scale up table input images to 144 dpi
|
self.scale = 2.0 # Scale up table input images to 144 dpi
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_options_type(cls) -> type[TableStructureOptions]:
|
||||||
|
return TableStructureOptions
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def download_models(
|
def download_models(
|
||||||
local_dir: Optional[Path] = None, force: bool = False, progress: bool = False
|
local_dir: Optional[Path] = None, force: bool = False, progress: bool = False
|
||||||
@@ -167,25 +171,30 @@ class TableStructureModel(BasePageModel):
|
|||||||
out_file = out_path / f"table_struct_page_{page.page_no:05}.png"
|
out_file = out_path / f"table_struct_page_{page.page_no:05}.png"
|
||||||
image.save(str(out_file), format="png")
|
image.save(str(out_file), format="png")
|
||||||
|
|
||||||
def __call__(
|
def predict_tables(
|
||||||
self, conv_res: ConversionResult, page_batch: Iterable[Page]
|
self,
|
||||||
) -> Iterable[Page]:
|
conv_res: ConversionResult,
|
||||||
if not self.enabled:
|
pages: Sequence[Page],
|
||||||
yield from page_batch
|
) -> Sequence[TableStructurePrediction]:
|
||||||
return
|
pages = list(pages)
|
||||||
|
predictions: list[TableStructurePrediction] = []
|
||||||
|
|
||||||
for page in page_batch:
|
for page in pages:
|
||||||
assert page._backend is not None
|
assert page._backend is not None
|
||||||
if not page._backend.is_valid():
|
if not page._backend.is_valid():
|
||||||
yield page
|
existing_prediction = (
|
||||||
else:
|
page.predictions.tablestructure or TableStructurePrediction()
|
||||||
|
)
|
||||||
|
page.predictions.tablestructure = existing_prediction
|
||||||
|
predictions.append(existing_prediction)
|
||||||
|
continue
|
||||||
|
|
||||||
with TimeRecorder(conv_res, "table_structure"):
|
with TimeRecorder(conv_res, "table_structure"):
|
||||||
assert page.predictions.layout is not None
|
assert page.predictions.layout is not None
|
||||||
assert page.size is not None
|
assert page.size is not None
|
||||||
|
|
||||||
page.predictions.tablestructure = (
|
table_prediction = TableStructurePrediction()
|
||||||
TableStructurePrediction()
|
page.predictions.tablestructure = table_prediction
|
||||||
) # dummy
|
|
||||||
|
|
||||||
in_tables = [
|
in_tables = [
|
||||||
(
|
(
|
||||||
@@ -201,8 +210,8 @@ class TableStructureModel(BasePageModel):
|
|||||||
if cluster.label
|
if cluster.label
|
||||||
in [DocItemLabel.TABLE, DocItemLabel.DOCUMENT_INDEX]
|
in [DocItemLabel.TABLE, DocItemLabel.DOCUMENT_INDEX]
|
||||||
]
|
]
|
||||||
if not len(in_tables):
|
if not in_tables:
|
||||||
yield page
|
predictions.append(table_prediction)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
page_input = {
|
page_input = {
|
||||||
@@ -211,9 +220,6 @@ class TableStructureModel(BasePageModel):
|
|||||||
"image": numpy.asarray(page.get_image(scale=self.scale)),
|
"image": numpy.asarray(page.get_image(scale=self.scale)),
|
||||||
}
|
}
|
||||||
|
|
||||||
table_clusters, table_bboxes = zip(*in_tables)
|
|
||||||
|
|
||||||
if len(table_bboxes):
|
|
||||||
for table_cluster, tbl_box in in_tables:
|
for table_cluster, tbl_box in in_tables:
|
||||||
# Check if word-level cells are available from backend:
|
# Check if word-level cells are available from backend:
|
||||||
sp = page._backend.get_segmented_page()
|
sp = page._backend.get_segmented_page()
|
||||||
@@ -234,9 +240,7 @@ class TableStructureModel(BasePageModel):
|
|||||||
if len(c.text.strip()) > 0:
|
if len(c.text.strip()) > 0:
|
||||||
new_cell = copy.deepcopy(c)
|
new_cell = copy.deepcopy(c)
|
||||||
new_cell.rect = BoundingRectangle.from_bounding_box(
|
new_cell.rect = BoundingRectangle.from_bounding_box(
|
||||||
new_cell.rect.to_bounding_box().scaled(
|
new_cell.rect.to_bounding_box().scaled(scale=self.scale)
|
||||||
scale=self.scale
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
tokens.append(
|
tokens.append(
|
||||||
{
|
{
|
||||||
@@ -257,9 +261,7 @@ class TableStructureModel(BasePageModel):
|
|||||||
the_bbox = BoundingBox.model_validate(
|
the_bbox = BoundingBox.model_validate(
|
||||||
element["bbox"]
|
element["bbox"]
|
||||||
).scaled(1 / self.scale)
|
).scaled(1 / self.scale)
|
||||||
text_piece = page._backend.get_text_in_rect(
|
text_piece = page._backend.get_text_in_rect(the_bbox)
|
||||||
the_bbox
|
|
||||||
)
|
|
||||||
element["bbox"]["token"] = text_piece
|
element["bbox"]["token"] = text_piece
|
||||||
|
|
||||||
tc = TableCell.model_validate(element)
|
tc = TableCell.model_validate(element)
|
||||||
@@ -289,11 +291,8 @@ class TableStructureModel(BasePageModel):
|
|||||||
label=table_cluster.label,
|
label=table_cluster.label,
|
||||||
)
|
)
|
||||||
|
|
||||||
page.predictions.tablestructure.table_map[
|
table_prediction.table_map[table_cluster.id] = tbl
|
||||||
table_cluster.id
|
|
||||||
] = tbl
|
|
||||||
|
|
||||||
# For debugging purposes:
|
|
||||||
if settings.debug.visualize_tables:
|
if settings.debug.visualize_tables:
|
||||||
self.draw_table_and_cells(
|
self.draw_table_and_cells(
|
||||||
conv_res,
|
conv_res,
|
||||||
@@ -301,4 +300,6 @@ class TableStructureModel(BasePageModel):
|
|||||||
page.predictions.tablestructure.table_map.values(),
|
page.predictions.tablestructure.table_map.values(),
|
||||||
)
|
)
|
||||||
|
|
||||||
yield page
|
predictions.append(table_prediction)
|
||||||
|
|
||||||
|
return predictions
|
||||||
|
|||||||
@@ -15,15 +15,17 @@ from docling.datamodel.pipeline_options import PdfPipelineOptions
|
|||||||
from docling.datamodel.settings import settings
|
from docling.datamodel.settings import settings
|
||||||
from docling.models.base_ocr_model import BaseOcrModel
|
from docling.models.base_ocr_model import BaseOcrModel
|
||||||
from docling.models.code_formula_model import CodeFormulaModel, CodeFormulaModelOptions
|
from docling.models.code_formula_model import CodeFormulaModel, CodeFormulaModelOptions
|
||||||
from docling.models.factories import get_ocr_factory
|
from docling.models.factories import (
|
||||||
from docling.models.layout_model import LayoutModel
|
get_layout_factory,
|
||||||
|
get_ocr_factory,
|
||||||
|
get_table_structure_factory,
|
||||||
|
)
|
||||||
from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions
|
from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions
|
||||||
from docling.models.page_preprocessing_model import (
|
from docling.models.page_preprocessing_model import (
|
||||||
PagePreprocessingModel,
|
PagePreprocessingModel,
|
||||||
PagePreprocessingOptions,
|
PagePreprocessingOptions,
|
||||||
)
|
)
|
||||||
from docling.models.readingorder_model import ReadingOrderModel, ReadingOrderOptions
|
from docling.models.readingorder_model import ReadingOrderModel, ReadingOrderOptions
|
||||||
from docling.models.table_structure_model import TableStructureModel
|
|
||||||
from docling.pipeline.base_pipeline import PaginatedPipeline
|
from docling.pipeline.base_pipeline import PaginatedPipeline
|
||||||
from docling.utils.model_downloader import download_models
|
from docling.utils.model_downloader import download_models
|
||||||
from docling.utils.profiling import ProfilingScope, TimeRecorder
|
from docling.utils.profiling import ProfilingScope, TimeRecorder
|
||||||
@@ -48,6 +50,24 @@ class LegacyStandardPdfPipeline(PaginatedPipeline):
|
|||||||
|
|
||||||
ocr_model = self.get_ocr_model(artifacts_path=self.artifacts_path)
|
ocr_model = self.get_ocr_model(artifacts_path=self.artifacts_path)
|
||||||
|
|
||||||
|
layout_factory = get_layout_factory(
|
||||||
|
allow_external_plugins=self.pipeline_options.allow_external_plugins
|
||||||
|
)
|
||||||
|
layout_model = layout_factory.create_instance(
|
||||||
|
options=pipeline_options.layout_options,
|
||||||
|
artifacts_path=self.artifacts_path,
|
||||||
|
accelerator_options=pipeline_options.accelerator_options,
|
||||||
|
)
|
||||||
|
table_factory = get_table_structure_factory(
|
||||||
|
allow_external_plugins=self.pipeline_options.allow_external_plugins
|
||||||
|
)
|
||||||
|
table_model = table_factory.create_instance(
|
||||||
|
options=pipeline_options.table_structure_options,
|
||||||
|
enabled=pipeline_options.do_table_structure,
|
||||||
|
artifacts_path=self.artifacts_path,
|
||||||
|
accelerator_options=pipeline_options.accelerator_options,
|
||||||
|
)
|
||||||
|
|
||||||
self.build_pipe = [
|
self.build_pipe = [
|
||||||
# Pre-processing
|
# Pre-processing
|
||||||
PagePreprocessingModel(
|
PagePreprocessingModel(
|
||||||
@@ -58,18 +78,9 @@ class LegacyStandardPdfPipeline(PaginatedPipeline):
|
|||||||
# OCR
|
# OCR
|
||||||
ocr_model,
|
ocr_model,
|
||||||
# Layout model
|
# Layout model
|
||||||
LayoutModel(
|
layout_model,
|
||||||
artifacts_path=self.artifacts_path,
|
|
||||||
accelerator_options=pipeline_options.accelerator_options,
|
|
||||||
options=pipeline_options.layout_options,
|
|
||||||
),
|
|
||||||
# Table structure model
|
# Table structure model
|
||||||
TableStructureModel(
|
table_model,
|
||||||
enabled=pipeline_options.do_table_structure,
|
|
||||||
artifacts_path=self.artifacts_path,
|
|
||||||
options=pipeline_options.table_structure_options,
|
|
||||||
accelerator_options=pipeline_options.accelerator_options,
|
|
||||||
),
|
|
||||||
# Page assemble
|
# Page assemble
|
||||||
PageAssembleModel(options=PageAssembleOptions()),
|
PageAssembleModel(options=PageAssembleOptions()),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -41,15 +41,17 @@ from docling.datamodel.document import ConversionResult
|
|||||||
from docling.datamodel.pipeline_options import ThreadedPdfPipelineOptions
|
from docling.datamodel.pipeline_options import ThreadedPdfPipelineOptions
|
||||||
from docling.datamodel.settings import settings
|
from docling.datamodel.settings import settings
|
||||||
from docling.models.code_formula_model import CodeFormulaModel, CodeFormulaModelOptions
|
from docling.models.code_formula_model import CodeFormulaModel, CodeFormulaModelOptions
|
||||||
from docling.models.factories import get_ocr_factory
|
from docling.models.factories import (
|
||||||
from docling.models.layout_model import LayoutModel
|
get_layout_factory,
|
||||||
|
get_ocr_factory,
|
||||||
|
get_table_structure_factory,
|
||||||
|
)
|
||||||
from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions
|
from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions
|
||||||
from docling.models.page_preprocessing_model import (
|
from docling.models.page_preprocessing_model import (
|
||||||
PagePreprocessingModel,
|
PagePreprocessingModel,
|
||||||
PagePreprocessingOptions,
|
PagePreprocessingOptions,
|
||||||
)
|
)
|
||||||
from docling.models.readingorder_model import ReadingOrderModel, ReadingOrderOptions
|
from docling.models.readingorder_model import ReadingOrderModel, ReadingOrderOptions
|
||||||
from docling.models.table_structure_model import TableStructureModel
|
|
||||||
from docling.pipeline.base_pipeline import ConvertPipeline
|
from docling.pipeline.base_pipeline import ConvertPipeline
|
||||||
from docling.utils.profiling import ProfilingScope, TimeRecorder
|
from docling.utils.profiling import ProfilingScope, TimeRecorder
|
||||||
from docling.utils.utils import chunkify
|
from docling.utils.utils import chunkify
|
||||||
@@ -436,15 +438,21 @@ class StandardPdfPipeline(ConvertPipeline):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
self.ocr_model = self._make_ocr_model(art_path)
|
self.ocr_model = self._make_ocr_model(art_path)
|
||||||
self.layout_model = LayoutModel(
|
layout_factory = get_layout_factory(
|
||||||
|
allow_external_plugins=self.pipeline_options.allow_external_plugins
|
||||||
|
)
|
||||||
|
self.layout_model = layout_factory.create_instance(
|
||||||
|
options=self.pipeline_options.layout_options,
|
||||||
artifacts_path=art_path,
|
artifacts_path=art_path,
|
||||||
accelerator_options=self.pipeline_options.accelerator_options,
|
accelerator_options=self.pipeline_options.accelerator_options,
|
||||||
options=self.pipeline_options.layout_options,
|
|
||||||
)
|
)
|
||||||
self.table_model = TableStructureModel(
|
table_factory = get_table_structure_factory(
|
||||||
|
allow_external_plugins=self.pipeline_options.allow_external_plugins
|
||||||
|
)
|
||||||
|
self.table_model = table_factory.create_instance(
|
||||||
|
options=self.pipeline_options.table_structure_options,
|
||||||
enabled=self.pipeline_options.do_table_structure,
|
enabled=self.pipeline_options.do_table_structure,
|
||||||
artifacts_path=art_path,
|
artifacts_path=art_path,
|
||||||
options=self.pipeline_options.table_structure_options,
|
|
||||||
accelerator_options=self.pipeline_options.accelerator_options,
|
accelerator_options=self.pipeline_options.accelerator_options,
|
||||||
)
|
)
|
||||||
self.assemble_model = PageAssembleModel(options=PageAssembleOptions())
|
self.assemble_model = PageAssembleModel(options=PageAssembleOptions())
|
||||||
|
|||||||
Reference in New Issue
Block a user