feat: Layout model specification and multiple choices (#1910)

* Establish layout_model spec and example instantations

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Updated naming

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Back to uppercase constants

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* fix deps issue with openai-whipser>numba>llvmlite

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Pull v1 changed test GT from main

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-07-10 06:37:27 +02:00
committed by GitHub
parent ec588df971
commit 2b8616d6d5
19 changed files with 923 additions and 791 deletions

View File

@@ -0,0 +1,90 @@
import logging
from enum import Enum
from pathlib import Path
from typing import Optional
from pydantic import BaseModel
from docling.datamodel.accelerator_options import AcceleratorDevice
_log = logging.getLogger(__name__)
class LayoutModelConfig(BaseModel):
name: str
repo_id: str
revision: str
model_path: str
supported_devices: list[AcceleratorDevice] = [
AcceleratorDevice.CPU,
AcceleratorDevice.CUDA,
AcceleratorDevice.MPS,
]
@property
def model_repo_folder(self) -> str:
return self.repo_id.replace("/", "--")
# HuggingFace Layout Models
# Default Docling Layout Model
DOCLING_LAYOUT_V2 = LayoutModelConfig(
name="docling_layout_v2",
repo_id="ds4sd/docling-layout-old",
revision="main",
model_path="",
)
DOCLING_LAYOUT_HERON = LayoutModelConfig(
name="docling_layout_heron",
repo_id="ds4sd/docling-layout-heron",
revision="main",
model_path="",
)
DOCLING_LAYOUT_HERON_101 = LayoutModelConfig(
name="docling_layout_heron_101",
repo_id="ds4sd/docling-layout-heron-101",
revision="main",
model_path="",
)
DOCLING_LAYOUT_EGRET_MEDIUM = LayoutModelConfig(
name="docling_layout_egret_medium",
repo_id="ds4sd/docling-layout-egret-medium",
revision="main",
model_path="",
)
DOCLING_LAYOUT_EGRET_LARGE = LayoutModelConfig(
name="docling_layout_egret_large",
repo_id="ds4sd/docling-layout-egret-large",
revision="main",
model_path="",
)
DOCLING_LAYOUT_EGRET_XLARGE = LayoutModelConfig(
name="docling_layout_egret_xlarge",
repo_id="ds4sd/docling-layout-egret-xlarge",
revision="main",
model_path="",
)
# Example for a hypothetical alternative model
# ALTERNATIVE_LAYOUT = LayoutModelConfig(
# name="alternative_layout",
# repo_id="someorg/alternative-layout",
# revision="main",
# model_path="model_artifacts/layout_alt",
# )
class LayoutModelType(str, Enum):
DOCLING_LAYOUT_V2 = "docling_layout_v2"
DOCLING_LAYOUT_HERON = "docling_layout_heron"
DOCLING_LAYOUT_HERON_101 = "docling_layout_heron_101"
DOCLING_LAYOUT_EGRET_MEDIUM = "docling_layout_egret_medium"
DOCLING_LAYOUT_EGRET_LARGE = "docling_layout_egret_large"
DOCLING_LAYOUT_EGRET_XLARGE = "docling_layout_egret_xlarge"
# ALTERNATIVE_LAYOUT = "alternative_layout"

View File

@@ -16,6 +16,15 @@ from docling.datamodel import asr_model_specs
# Import the following for backwards compatibility
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
from docling.datamodel.layout_model_specs import (
DOCLING_LAYOUT_EGRET_LARGE,
DOCLING_LAYOUT_EGRET_MEDIUM,
DOCLING_LAYOUT_EGRET_XLARGE,
DOCLING_LAYOUT_HERON,
DOCLING_LAYOUT_HERON_101,
DOCLING_LAYOUT_V2,
LayoutModelConfig,
)
from docling.datamodel.pipeline_options_asr_model import (
InlineAsrOptions,
)
@@ -270,6 +279,7 @@ class LayoutOptions(BaseModel):
"""Options for layout processing."""
create_orphan_clusters: bool = True # Whether to create clusters for orphaned cells
model_spec: LayoutModelConfig = DOCLING_LAYOUT_V2
class AsrPipelineOptions(PipelineOptions):

View File

@@ -12,6 +12,7 @@ from PIL import Image
from docling.datamodel.accelerator_options import AcceleratorOptions
from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page
from docling.datamodel.document import ConversionResult
from docling.datamodel.layout_model_specs import DOCLING_LAYOUT_V2, LayoutModelConfig
from docling.datamodel.pipeline_options import LayoutOptions
from docling.datamodel.settings import settings
from docling.models.base_model import BasePageModel
@@ -25,9 +26,6 @@ _log = logging.getLogger(__name__)
class LayoutModel(BasePageModel):
_model_repo_folder = "ds4sd--docling-models"
_model_path = "model_artifacts/layout"
TEXT_ELEM_LABELS = [
DocItemLabel.TEXT,
DocItemLabel.FOOTNOTE,
@@ -59,25 +57,28 @@ class LayoutModel(BasePageModel):
self.options = options
device = decide_device(accelerator_options.device)
layout_model_config = options.model_spec
model_repo_folder = layout_model_config.model_repo_folder
model_path = layout_model_config.model_path
if artifacts_path is None:
artifacts_path = self.download_models() / self._model_path
artifacts_path = (
self.download_models(layout_model_config=layout_model_config)
/ model_path
)
else:
# will become the default in the future
if (artifacts_path / self._model_repo_folder).exists():
artifacts_path = (
artifacts_path / self._model_repo_folder / self._model_path
)
elif (artifacts_path / self._model_path).exists():
if (artifacts_path / model_repo_folder).exists():
artifacts_path = artifacts_path / model_repo_folder / model_path
elif (artifacts_path / model_path).exists():
warnings.warn(
"The usage of artifacts_path containing directly "
f"{self._model_path} is deprecated. Please point "
f"{model_path} is deprecated. Please point "
"the artifacts_path to the parent containing "
f"the {self._model_repo_folder} folder.",
f"the {model_repo_folder} folder.",
DeprecationWarning,
stacklevel=3,
)
artifacts_path = artifacts_path / self._model_path
artifacts_path = artifacts_path / model_path
self.layout_predictor = LayoutPredictor(
artifact_path=str(artifacts_path),
@@ -90,10 +91,11 @@ class LayoutModel(BasePageModel):
local_dir: Optional[Path] = None,
force: bool = False,
progress: bool = False,
layout_model_config: LayoutModelConfig = DOCLING_LAYOUT_V2,
) -> Path:
return download_hf_model(
repo_id="ds4sd/docling-models",
revision="v2.2.0",
repo_id=layout_model_config.repo_id,
revision=layout_model_config.revision,
local_dir=local_dir,
force=force,
progress=progress,

View File

@@ -10,6 +10,7 @@ from docling.backend.abstract_backend import AbstractDocumentBackend
from docling.backend.pdf_backend import PdfDocumentBackend
from docling.datamodel.base_models import AssembledUnit, Page
from docling.datamodel.document import ConversionResult
from docling.datamodel.layout_model_specs import LayoutModelConfig
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.datamodel.settings import settings
from docling.models.base_ocr_model import BaseOcrModel
@@ -36,9 +37,6 @@ _log = logging.getLogger(__name__)
class StandardPdfPipeline(PaginatedPipeline):
_layout_model_path = LayoutModel._model_path
_table_model_path = TableStructureModel._model_path
def __init__(self, pipeline_options: PdfPipelineOptions):
super().__init__(pipeline_options)
self.pipeline_options: PdfPipelineOptions

View File

@@ -2,6 +2,7 @@ import logging
from pathlib import Path
from typing import Optional
from docling.datamodel.layout_model_specs import DOCLING_LAYOUT_V2
from docling.datamodel.pipeline_options import (
granite_picture_description,
smolvlm_picture_description,
@@ -46,7 +47,7 @@ def download_models(
if with_layout:
_log.info("Downloading layout model...")
LayoutModel.download_models(
local_dir=output_dir / LayoutModel._model_repo_folder,
local_dir=output_dir / DOCLING_LAYOUT_V2.model_repo_folder,
force=force,
progress=progress,
)