Fix artifacts_path handling in Layout+VLM pipeline

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2025-12-08 20:58:11 +00:00 · 2025-12-03 12:52:22 +01:00
parent c97715f5fd
commit 92e4f2220a
2 changed files with 45 additions and 18 deletions
--- a/tests/test_options.py
+++ b/tests/test_options.py
@@ -6,6 +6,7 @@ import pytest
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
 from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
+from docling.backend.image_backend import ImageDocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
 from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
 from docling.datamodel.base_models import ConversionStatus, InputFormat, QualityGrade
@@ -13,9 +14,11 @@ from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
    PdfPipelineOptions,
    TableFormerMode,
+    VlmPipelineOptions,
 )
 from docling.document_converter import DocumentConverter, PdfFormatOption
 from docling.pipeline.legacy_standard_pdf_pipeline import LegacyStandardPdfPipeline
+from docling.pipeline.vlm_pipeline import VlmPipeline


@pytest.fixture
@@ -192,6 +195,23 @@ def test_parser_backends(test_doc_path):
        assert doc_result.status == ConversionStatus.SUCCESS


+def test_image_pipeline_preserves_custom_pipeline_cls():
+    with pytest.warns(DeprecationWarning):
+        converter = DocumentConverter(
+            format_options={
+                InputFormat.IMAGE: PdfFormatOption(
+                    pipeline_cls=VlmPipeline,
+                    pipeline_options=VlmPipelineOptions(),
+                )
+            }
+        )
+
+    image_format_option = converter.format_to_options[InputFormat.IMAGE]
+
+    assert image_format_option.pipeline_cls is VlmPipeline
+    assert image_format_option.backend is ImageDocumentBackend
+
+
 def test_confidence(test_doc_path):
    converter = DocumentConverter()
    doc_result: ConversionResult = converter.convert(test_doc_path, page_range=(6, 9))