Fix artifacts_path handling in Layout+VLM pipeline

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2025-12-08 12:48:28 +00:00 · 2025-12-03 12:52:22 +01:00
parent c97715f5fd
commit 92e4f2220a
2 changed files with 45 additions and 18 deletions
--- a/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py
+++ b/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py
@@ -28,7 +28,6 @@ from docling.datamodel.pipeline_options_vlm_model import (
    InferenceFramework,
    InlineVlmOptions,
 )
-from docling.datamodel.settings import settings
 from docling.experimental.datamodel.threaded_layout_vlm_pipeline_options import (
    ThreadedLayoutVlmPipelineOptions,
 )
@@ -68,11 +67,15 @@ class ThreadedLayoutVlmPipeline(BasePipeline):

    def _init_models(self) -> None:
        """Initialize layout and VLM models."""
-        art_path = self._resolve_artifacts_path()
+        base_artifacts_path = self.artifacts_path
+        layout_artifacts_path = self._resolve_model_artifacts_path(
+            base_artifacts_path,
+            self.pipeline_options.layout_options.model_spec.model_repo_folder,
+        )

        # Layout model
        self.layout_model = LayoutModel(
-            artifacts_path=art_path,
+            artifacts_path=layout_artifacts_path,
            accelerator_options=self.pipeline_options.accelerator_options,
            options=self.pipeline_options.layout_options,
        )
@@ -147,17 +150,20 @@ class ThreadedLayoutVlmPipeline(BasePipeline):
                vlm_options=vlm_options,
            )
        elif isinstance(base_vlm_options, InlineVlmOptions):
+            vlm_artifacts_path = self._resolve_model_artifacts_path(
+                base_artifacts_path, vlm_options.repo_cache_folder
+            )
            if vlm_options.inference_framework == InferenceFramework.TRANSFORMERS:
                self.vlm_model = HuggingFaceTransformersVlmModel(
                    enabled=True,
-                    artifacts_path=art_path,
+                    artifacts_path=vlm_artifacts_path,
                    accelerator_options=self.pipeline_options.accelerator_options,
                    vlm_options=vlm_options,
                )
            elif vlm_options.inference_framework == InferenceFramework.MLX:
                self.vlm_model = HuggingFaceMlxModel(
                    enabled=True,
-                    artifacts_path=art_path,
+                    artifacts_path=vlm_artifacts_path,
                    accelerator_options=self.pipeline_options.accelerator_options,
                    vlm_options=vlm_options,
                )
@@ -166,7 +172,7 @@ class ThreadedLayoutVlmPipeline(BasePipeline):

                self.vlm_model = VllmVlmModel(
                    enabled=True,
-                    artifacts_path=art_path,
+                    artifacts_path=vlm_artifacts_path,
                    accelerator_options=self.pipeline_options.accelerator_options,
                    vlm_options=vlm_options,
                )
@@ -177,19 +183,20 @@ class ThreadedLayoutVlmPipeline(BasePipeline):
        else:
            raise ValueError(f"Unsupported VLM options type: {type(base_vlm_options)}")

-    def _resolve_artifacts_path(self) -> Optional[Path]:
-        """Resolve artifacts path from options or settings."""
-        if self.pipeline_options.artifacts_path:
-            p = Path(self.pipeline_options.artifacts_path).expanduser()
-        elif settings.artifacts_path:
-            p = Path(settings.artifacts_path).expanduser()
-        else:
+    def _resolve_model_artifacts_path(
+        self, base_path: Optional[Path], model_slug: str
+    ) -> Optional[Path]:
+        """Resolve the artifacts path for a specific model using its slug."""
+        if base_path is None:
            return None
-        if not p.is_dir():
+
+        candidate = base_path / model_slug
+        if candidate.is_dir():
+            return candidate
+
        raise RuntimeError(
-                f"{p} does not exist or is not a directory containing the required models"
+            f"Expected artifacts_path {base_path} to contain model directory {model_slug}"
        )
-        return p

    def _create_run_ctx(self) -> RunContext:
        """Create pipeline stages and wire them together."""
--- a/tests/test_options.py
+++ b/tests/test_options.py
@@ -6,6 +6,7 @@ import pytest
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
 from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
+from docling.backend.image_backend import ImageDocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
 from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
 from docling.datamodel.base_models import ConversionStatus, InputFormat, QualityGrade
@@ -13,9 +14,11 @@ from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
    PdfPipelineOptions,
    TableFormerMode,
+    VlmPipelineOptions,
 )
 from docling.document_converter import DocumentConverter, PdfFormatOption
 from docling.pipeline.legacy_standard_pdf_pipeline import LegacyStandardPdfPipeline
+from docling.pipeline.vlm_pipeline import VlmPipeline


@pytest.fixture
@@ -192,6 +195,23 @@ def test_parser_backends(test_doc_path):
        assert doc_result.status == ConversionStatus.SUCCESS


+def test_image_pipeline_preserves_custom_pipeline_cls():
+    with pytest.warns(DeprecationWarning):
+        converter = DocumentConverter(
+            format_options={
+                InputFormat.IMAGE: PdfFormatOption(
+                    pipeline_cls=VlmPipeline,
+                    pipeline_options=VlmPipelineOptions(),
+                )
+            }
+        )
+
+    image_format_option = converter.format_to_options[InputFormat.IMAGE]
+
+    assert image_format_option.pipeline_cls is VlmPipeline
+    assert image_format_option.backend is ImageDocumentBackend
+
+
 def test_confidence(test_doc_path):
    converter = DocumentConverter()
    doc_result: ConversionResult = converter.convert(test_doc_path, page_range=(6, 9))