From 92e4f2220a17b9ac0e4a6aa7655d88497d01be27 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Wed, 3 Dec 2025 12:52:22 +0100 Subject: [PATCH] Fix artifacts_path handling in Layout+VLM pipeline Signed-off-by: Christoph Auer --- .../pipeline/threaded_layout_vlm_pipeline.py | 43 +++++++++++-------- tests/test_options.py | 20 +++++++++ 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py b/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py index 92c0c104..31b1b95d 100644 --- a/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py +++ b/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py @@ -28,7 +28,6 @@ from docling.datamodel.pipeline_options_vlm_model import ( InferenceFramework, InlineVlmOptions, ) -from docling.datamodel.settings import settings from docling.experimental.datamodel.threaded_layout_vlm_pipeline_options import ( ThreadedLayoutVlmPipelineOptions, ) @@ -68,11 +67,15 @@ class ThreadedLayoutVlmPipeline(BasePipeline): def _init_models(self) -> None: """Initialize layout and VLM models.""" - art_path = self._resolve_artifacts_path() + base_artifacts_path = self.artifacts_path + layout_artifacts_path = self._resolve_model_artifacts_path( + base_artifacts_path, + self.pipeline_options.layout_options.model_spec.model_repo_folder, + ) # Layout model self.layout_model = LayoutModel( - artifacts_path=art_path, + artifacts_path=layout_artifacts_path, accelerator_options=self.pipeline_options.accelerator_options, options=self.pipeline_options.layout_options, ) @@ -147,17 +150,20 @@ class ThreadedLayoutVlmPipeline(BasePipeline): vlm_options=vlm_options, ) elif isinstance(base_vlm_options, InlineVlmOptions): + vlm_artifacts_path = self._resolve_model_artifacts_path( + base_artifacts_path, vlm_options.repo_cache_folder + ) if vlm_options.inference_framework == InferenceFramework.TRANSFORMERS: self.vlm_model = HuggingFaceTransformersVlmModel( enabled=True, - artifacts_path=art_path, + artifacts_path=vlm_artifacts_path, accelerator_options=self.pipeline_options.accelerator_options, vlm_options=vlm_options, ) elif vlm_options.inference_framework == InferenceFramework.MLX: self.vlm_model = HuggingFaceMlxModel( enabled=True, - artifacts_path=art_path, + artifacts_path=vlm_artifacts_path, accelerator_options=self.pipeline_options.accelerator_options, vlm_options=vlm_options, ) @@ -166,7 +172,7 @@ class ThreadedLayoutVlmPipeline(BasePipeline): self.vlm_model = VllmVlmModel( enabled=True, - artifacts_path=art_path, + artifacts_path=vlm_artifacts_path, accelerator_options=self.pipeline_options.accelerator_options, vlm_options=vlm_options, ) @@ -177,19 +183,20 @@ class ThreadedLayoutVlmPipeline(BasePipeline): else: raise ValueError(f"Unsupported VLM options type: {type(base_vlm_options)}") - def _resolve_artifacts_path(self) -> Optional[Path]: - """Resolve artifacts path from options or settings.""" - if self.pipeline_options.artifacts_path: - p = Path(self.pipeline_options.artifacts_path).expanduser() - elif settings.artifacts_path: - p = Path(settings.artifacts_path).expanduser() - else: + def _resolve_model_artifacts_path( + self, base_path: Optional[Path], model_slug: str + ) -> Optional[Path]: + """Resolve the artifacts path for a specific model using its slug.""" + if base_path is None: return None - if not p.is_dir(): - raise RuntimeError( - f"{p} does not exist or is not a directory containing the required models" - ) - return p + + candidate = base_path / model_slug + if candidate.is_dir(): + return candidate + + raise RuntimeError( + f"Expected artifacts_path {base_path} to contain model directory {model_slug}" + ) def _create_run_ctx(self) -> RunContext: """Create pipeline stages and wire them together.""" diff --git a/tests/test_options.py b/tests/test_options.py index 1e06378f..a06db690 100644 --- a/tests/test_options.py +++ b/tests/test_options.py @@ -6,6 +6,7 @@ import pytest from docling.backend.docling_parse_backend import DoclingParseDocumentBackend from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend +from docling.backend.image_backend import ImageDocumentBackend from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions from docling.datamodel.base_models import ConversionStatus, InputFormat, QualityGrade @@ -13,9 +14,11 @@ from docling.datamodel.document import ConversionResult from docling.datamodel.pipeline_options import ( PdfPipelineOptions, TableFormerMode, + VlmPipelineOptions, ) from docling.document_converter import DocumentConverter, PdfFormatOption from docling.pipeline.legacy_standard_pdf_pipeline import LegacyStandardPdfPipeline +from docling.pipeline.vlm_pipeline import VlmPipeline @pytest.fixture @@ -192,6 +195,23 @@ def test_parser_backends(test_doc_path): assert doc_result.status == ConversionStatus.SUCCESS +def test_image_pipeline_preserves_custom_pipeline_cls(): + with pytest.warns(DeprecationWarning): + converter = DocumentConverter( + format_options={ + InputFormat.IMAGE: PdfFormatOption( + pipeline_cls=VlmPipeline, + pipeline_options=VlmPipelineOptions(), + ) + } + ) + + image_format_option = converter.format_to_options[InputFormat.IMAGE] + + assert image_format_option.pipeline_cls is VlmPipeline + assert image_format_option.backend is ImageDocumentBackend + + def test_confidence(test_doc_path): converter = DocumentConverter() doc_result: ConversionResult = converter.convert(test_doc_path, page_range=(6, 9))