Fix artifacts_path handling in Layout+VLM pipeline

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-12-03 12:52:22 +01:00
parent c97715f5fd
commit 92e4f2220a
2 changed files with 45 additions and 18 deletions

View File

@@ -28,7 +28,6 @@ from docling.datamodel.pipeline_options_vlm_model import (
InferenceFramework,
InlineVlmOptions,
)
from docling.datamodel.settings import settings
from docling.experimental.datamodel.threaded_layout_vlm_pipeline_options import (
ThreadedLayoutVlmPipelineOptions,
)
@@ -68,11 +67,15 @@ class ThreadedLayoutVlmPipeline(BasePipeline):
def _init_models(self) -> None:
"""Initialize layout and VLM models."""
art_path = self._resolve_artifacts_path()
base_artifacts_path = self.artifacts_path
layout_artifacts_path = self._resolve_model_artifacts_path(
base_artifacts_path,
self.pipeline_options.layout_options.model_spec.model_repo_folder,
)
# Layout model
self.layout_model = LayoutModel(
artifacts_path=art_path,
artifacts_path=layout_artifacts_path,
accelerator_options=self.pipeline_options.accelerator_options,
options=self.pipeline_options.layout_options,
)
@@ -147,17 +150,20 @@ class ThreadedLayoutVlmPipeline(BasePipeline):
vlm_options=vlm_options,
)
elif isinstance(base_vlm_options, InlineVlmOptions):
vlm_artifacts_path = self._resolve_model_artifacts_path(
base_artifacts_path, vlm_options.repo_cache_folder
)
if vlm_options.inference_framework == InferenceFramework.TRANSFORMERS:
self.vlm_model = HuggingFaceTransformersVlmModel(
enabled=True,
artifacts_path=art_path,
artifacts_path=vlm_artifacts_path,
accelerator_options=self.pipeline_options.accelerator_options,
vlm_options=vlm_options,
)
elif vlm_options.inference_framework == InferenceFramework.MLX:
self.vlm_model = HuggingFaceMlxModel(
enabled=True,
artifacts_path=art_path,
artifacts_path=vlm_artifacts_path,
accelerator_options=self.pipeline_options.accelerator_options,
vlm_options=vlm_options,
)
@@ -166,7 +172,7 @@ class ThreadedLayoutVlmPipeline(BasePipeline):
self.vlm_model = VllmVlmModel(
enabled=True,
artifacts_path=art_path,
artifacts_path=vlm_artifacts_path,
accelerator_options=self.pipeline_options.accelerator_options,
vlm_options=vlm_options,
)
@@ -177,19 +183,20 @@ class ThreadedLayoutVlmPipeline(BasePipeline):
else:
raise ValueError(f"Unsupported VLM options type: {type(base_vlm_options)}")
def _resolve_artifacts_path(self) -> Optional[Path]:
"""Resolve artifacts path from options or settings."""
if self.pipeline_options.artifacts_path:
p = Path(self.pipeline_options.artifacts_path).expanduser()
elif settings.artifacts_path:
p = Path(settings.artifacts_path).expanduser()
else:
def _resolve_model_artifacts_path(
self, base_path: Optional[Path], model_slug: str
) -> Optional[Path]:
"""Resolve the artifacts path for a specific model using its slug."""
if base_path is None:
return None
if not p.is_dir():
candidate = base_path / model_slug
if candidate.is_dir():
return candidate
raise RuntimeError(
f"{p} does not exist or is not a directory containing the required models"
f"Expected artifacts_path {base_path} to contain model directory {model_slug}"
)
return p
def _create_run_ctx(self) -> RunContext:
"""Create pipeline stages and wire them together."""

View File

@@ -6,6 +6,7 @@ import pytest
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
from docling.backend.image_backend import ImageDocumentBackend
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
from docling.datamodel.base_models import ConversionStatus, InputFormat, QualityGrade
@@ -13,9 +14,11 @@ from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import (
PdfPipelineOptions,
TableFormerMode,
VlmPipelineOptions,
)
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.pipeline.legacy_standard_pdf_pipeline import LegacyStandardPdfPipeline
from docling.pipeline.vlm_pipeline import VlmPipeline
@pytest.fixture
@@ -192,6 +195,23 @@ def test_parser_backends(test_doc_path):
assert doc_result.status == ConversionStatus.SUCCESS
def test_image_pipeline_preserves_custom_pipeline_cls():
with pytest.warns(DeprecationWarning):
converter = DocumentConverter(
format_options={
InputFormat.IMAGE: PdfFormatOption(
pipeline_cls=VlmPipeline,
pipeline_options=VlmPipelineOptions(),
)
}
)
image_format_option = converter.format_to_options[InputFormat.IMAGE]
assert image_format_option.pipeline_cls is VlmPipeline
assert image_format_option.backend is ImageDocumentBackend
def test_confidence(test_doc_path):
converter = DocumentConverter()
doc_result: ConversionResult = converter.convert(test_doc_path, page_range=(6, 9))