mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-10 13:48:13 +00:00
feat: Layout model specification and multiple choices (#1910)
* Establish layout_model spec and example instantations Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Updated naming Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Back to uppercase constants Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * fix deps issue with openai-whipser>numba>llvmlite Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Pull v1 changed test GT from main Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
90
docling/datamodel/layout_model_specs.py
Normal file
90
docling/datamodel/layout_model_specs.py
Normal file
@@ -0,0 +1,90 @@
|
||||
import logging
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from docling.datamodel.accelerator_options import AcceleratorDevice
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LayoutModelConfig(BaseModel):
|
||||
name: str
|
||||
repo_id: str
|
||||
revision: str
|
||||
model_path: str
|
||||
supported_devices: list[AcceleratorDevice] = [
|
||||
AcceleratorDevice.CPU,
|
||||
AcceleratorDevice.CUDA,
|
||||
AcceleratorDevice.MPS,
|
||||
]
|
||||
|
||||
@property
|
||||
def model_repo_folder(self) -> str:
|
||||
return self.repo_id.replace("/", "--")
|
||||
|
||||
|
||||
# HuggingFace Layout Models
|
||||
|
||||
# Default Docling Layout Model
|
||||
DOCLING_LAYOUT_V2 = LayoutModelConfig(
|
||||
name="docling_layout_v2",
|
||||
repo_id="ds4sd/docling-layout-old",
|
||||
revision="main",
|
||||
model_path="",
|
||||
)
|
||||
|
||||
DOCLING_LAYOUT_HERON = LayoutModelConfig(
|
||||
name="docling_layout_heron",
|
||||
repo_id="ds4sd/docling-layout-heron",
|
||||
revision="main",
|
||||
model_path="",
|
||||
)
|
||||
|
||||
DOCLING_LAYOUT_HERON_101 = LayoutModelConfig(
|
||||
name="docling_layout_heron_101",
|
||||
repo_id="ds4sd/docling-layout-heron-101",
|
||||
revision="main",
|
||||
model_path="",
|
||||
)
|
||||
|
||||
DOCLING_LAYOUT_EGRET_MEDIUM = LayoutModelConfig(
|
||||
name="docling_layout_egret_medium",
|
||||
repo_id="ds4sd/docling-layout-egret-medium",
|
||||
revision="main",
|
||||
model_path="",
|
||||
)
|
||||
|
||||
DOCLING_LAYOUT_EGRET_LARGE = LayoutModelConfig(
|
||||
name="docling_layout_egret_large",
|
||||
repo_id="ds4sd/docling-layout-egret-large",
|
||||
revision="main",
|
||||
model_path="",
|
||||
)
|
||||
|
||||
DOCLING_LAYOUT_EGRET_XLARGE = LayoutModelConfig(
|
||||
name="docling_layout_egret_xlarge",
|
||||
repo_id="ds4sd/docling-layout-egret-xlarge",
|
||||
revision="main",
|
||||
model_path="",
|
||||
)
|
||||
|
||||
# Example for a hypothetical alternative model
|
||||
# ALTERNATIVE_LAYOUT = LayoutModelConfig(
|
||||
# name="alternative_layout",
|
||||
# repo_id="someorg/alternative-layout",
|
||||
# revision="main",
|
||||
# model_path="model_artifacts/layout_alt",
|
||||
# )
|
||||
|
||||
|
||||
class LayoutModelType(str, Enum):
|
||||
DOCLING_LAYOUT_V2 = "docling_layout_v2"
|
||||
DOCLING_LAYOUT_HERON = "docling_layout_heron"
|
||||
DOCLING_LAYOUT_HERON_101 = "docling_layout_heron_101"
|
||||
DOCLING_LAYOUT_EGRET_MEDIUM = "docling_layout_egret_medium"
|
||||
DOCLING_LAYOUT_EGRET_LARGE = "docling_layout_egret_large"
|
||||
DOCLING_LAYOUT_EGRET_XLARGE = "docling_layout_egret_xlarge"
|
||||
# ALTERNATIVE_LAYOUT = "alternative_layout"
|
||||
@@ -16,6 +16,15 @@ from docling.datamodel import asr_model_specs
|
||||
|
||||
# Import the following for backwards compatibility
|
||||
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
||||
from docling.datamodel.layout_model_specs import (
|
||||
DOCLING_LAYOUT_EGRET_LARGE,
|
||||
DOCLING_LAYOUT_EGRET_MEDIUM,
|
||||
DOCLING_LAYOUT_EGRET_XLARGE,
|
||||
DOCLING_LAYOUT_HERON,
|
||||
DOCLING_LAYOUT_HERON_101,
|
||||
DOCLING_LAYOUT_V2,
|
||||
LayoutModelConfig,
|
||||
)
|
||||
from docling.datamodel.pipeline_options_asr_model import (
|
||||
InlineAsrOptions,
|
||||
)
|
||||
@@ -270,6 +279,7 @@ class LayoutOptions(BaseModel):
|
||||
"""Options for layout processing."""
|
||||
|
||||
create_orphan_clusters: bool = True # Whether to create clusters for orphaned cells
|
||||
model_spec: LayoutModelConfig = DOCLING_LAYOUT_V2
|
||||
|
||||
|
||||
class AsrPipelineOptions(PipelineOptions):
|
||||
|
||||
@@ -12,6 +12,7 @@ from PIL import Image
|
||||
from docling.datamodel.accelerator_options import AcceleratorOptions
|
||||
from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.layout_model_specs import DOCLING_LAYOUT_V2, LayoutModelConfig
|
||||
from docling.datamodel.pipeline_options import LayoutOptions
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.base_model import BasePageModel
|
||||
@@ -25,9 +26,6 @@ _log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LayoutModel(BasePageModel):
|
||||
_model_repo_folder = "ds4sd--docling-models"
|
||||
_model_path = "model_artifacts/layout"
|
||||
|
||||
TEXT_ELEM_LABELS = [
|
||||
DocItemLabel.TEXT,
|
||||
DocItemLabel.FOOTNOTE,
|
||||
@@ -59,25 +57,28 @@ class LayoutModel(BasePageModel):
|
||||
self.options = options
|
||||
|
||||
device = decide_device(accelerator_options.device)
|
||||
layout_model_config = options.model_spec
|
||||
model_repo_folder = layout_model_config.model_repo_folder
|
||||
model_path = layout_model_config.model_path
|
||||
|
||||
if artifacts_path is None:
|
||||
artifacts_path = self.download_models() / self._model_path
|
||||
artifacts_path = (
|
||||
self.download_models(layout_model_config=layout_model_config)
|
||||
/ model_path
|
||||
)
|
||||
else:
|
||||
# will become the default in the future
|
||||
if (artifacts_path / self._model_repo_folder).exists():
|
||||
artifacts_path = (
|
||||
artifacts_path / self._model_repo_folder / self._model_path
|
||||
)
|
||||
elif (artifacts_path / self._model_path).exists():
|
||||
if (artifacts_path / model_repo_folder).exists():
|
||||
artifacts_path = artifacts_path / model_repo_folder / model_path
|
||||
elif (artifacts_path / model_path).exists():
|
||||
warnings.warn(
|
||||
"The usage of artifacts_path containing directly "
|
||||
f"{self._model_path} is deprecated. Please point "
|
||||
f"{model_path} is deprecated. Please point "
|
||||
"the artifacts_path to the parent containing "
|
||||
f"the {self._model_repo_folder} folder.",
|
||||
f"the {model_repo_folder} folder.",
|
||||
DeprecationWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
artifacts_path = artifacts_path / self._model_path
|
||||
artifacts_path = artifacts_path / model_path
|
||||
|
||||
self.layout_predictor = LayoutPredictor(
|
||||
artifact_path=str(artifacts_path),
|
||||
@@ -90,10 +91,11 @@ class LayoutModel(BasePageModel):
|
||||
local_dir: Optional[Path] = None,
|
||||
force: bool = False,
|
||||
progress: bool = False,
|
||||
layout_model_config: LayoutModelConfig = DOCLING_LAYOUT_V2,
|
||||
) -> Path:
|
||||
return download_hf_model(
|
||||
repo_id="ds4sd/docling-models",
|
||||
revision="v2.2.0",
|
||||
repo_id=layout_model_config.repo_id,
|
||||
revision=layout_model_config.revision,
|
||||
local_dir=local_dir,
|
||||
force=force,
|
||||
progress=progress,
|
||||
|
||||
@@ -10,6 +10,7 @@ from docling.backend.abstract_backend import AbstractDocumentBackend
|
||||
from docling.backend.pdf_backend import PdfDocumentBackend
|
||||
from docling.datamodel.base_models import AssembledUnit, Page
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.layout_model_specs import LayoutModelConfig
|
||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.base_ocr_model import BaseOcrModel
|
||||
@@ -36,9 +37,6 @@ _log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class StandardPdfPipeline(PaginatedPipeline):
|
||||
_layout_model_path = LayoutModel._model_path
|
||||
_table_model_path = TableStructureModel._model_path
|
||||
|
||||
def __init__(self, pipeline_options: PdfPipelineOptions):
|
||||
super().__init__(pipeline_options)
|
||||
self.pipeline_options: PdfPipelineOptions
|
||||
|
||||
@@ -2,6 +2,7 @@ import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from docling.datamodel.layout_model_specs import DOCLING_LAYOUT_V2
|
||||
from docling.datamodel.pipeline_options import (
|
||||
granite_picture_description,
|
||||
smolvlm_picture_description,
|
||||
@@ -46,7 +47,7 @@ def download_models(
|
||||
if with_layout:
|
||||
_log.info("Downloading layout model...")
|
||||
LayoutModel.download_models(
|
||||
local_dir=output_dir / LayoutModel._model_repo_folder,
|
||||
local_dir=output_dir / DOCLING_LAYOUT_V2.model_repo_folder,
|
||||
force=force,
|
||||
progress=progress,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user