mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
Fix artifacts_path handling in Layout+VLM pipeline
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
@@ -28,7 +28,6 @@ from docling.datamodel.pipeline_options_vlm_model import (
|
|||||||
InferenceFramework,
|
InferenceFramework,
|
||||||
InlineVlmOptions,
|
InlineVlmOptions,
|
||||||
)
|
)
|
||||||
from docling.datamodel.settings import settings
|
|
||||||
from docling.experimental.datamodel.threaded_layout_vlm_pipeline_options import (
|
from docling.experimental.datamodel.threaded_layout_vlm_pipeline_options import (
|
||||||
ThreadedLayoutVlmPipelineOptions,
|
ThreadedLayoutVlmPipelineOptions,
|
||||||
)
|
)
|
||||||
@@ -68,11 +67,15 @@ class ThreadedLayoutVlmPipeline(BasePipeline):
|
|||||||
|
|
||||||
def _init_models(self) -> None:
|
def _init_models(self) -> None:
|
||||||
"""Initialize layout and VLM models."""
|
"""Initialize layout and VLM models."""
|
||||||
art_path = self._resolve_artifacts_path()
|
base_artifacts_path = self.artifacts_path
|
||||||
|
layout_artifacts_path = self._resolve_model_artifacts_path(
|
||||||
|
base_artifacts_path,
|
||||||
|
self.pipeline_options.layout_options.model_spec.model_repo_folder,
|
||||||
|
)
|
||||||
|
|
||||||
# Layout model
|
# Layout model
|
||||||
self.layout_model = LayoutModel(
|
self.layout_model = LayoutModel(
|
||||||
artifacts_path=art_path,
|
artifacts_path=layout_artifacts_path,
|
||||||
accelerator_options=self.pipeline_options.accelerator_options,
|
accelerator_options=self.pipeline_options.accelerator_options,
|
||||||
options=self.pipeline_options.layout_options,
|
options=self.pipeline_options.layout_options,
|
||||||
)
|
)
|
||||||
@@ -147,17 +150,20 @@ class ThreadedLayoutVlmPipeline(BasePipeline):
|
|||||||
vlm_options=vlm_options,
|
vlm_options=vlm_options,
|
||||||
)
|
)
|
||||||
elif isinstance(base_vlm_options, InlineVlmOptions):
|
elif isinstance(base_vlm_options, InlineVlmOptions):
|
||||||
|
vlm_artifacts_path = self._resolve_model_artifacts_path(
|
||||||
|
base_artifacts_path, vlm_options.repo_cache_folder
|
||||||
|
)
|
||||||
if vlm_options.inference_framework == InferenceFramework.TRANSFORMERS:
|
if vlm_options.inference_framework == InferenceFramework.TRANSFORMERS:
|
||||||
self.vlm_model = HuggingFaceTransformersVlmModel(
|
self.vlm_model = HuggingFaceTransformersVlmModel(
|
||||||
enabled=True,
|
enabled=True,
|
||||||
artifacts_path=art_path,
|
artifacts_path=vlm_artifacts_path,
|
||||||
accelerator_options=self.pipeline_options.accelerator_options,
|
accelerator_options=self.pipeline_options.accelerator_options,
|
||||||
vlm_options=vlm_options,
|
vlm_options=vlm_options,
|
||||||
)
|
)
|
||||||
elif vlm_options.inference_framework == InferenceFramework.MLX:
|
elif vlm_options.inference_framework == InferenceFramework.MLX:
|
||||||
self.vlm_model = HuggingFaceMlxModel(
|
self.vlm_model = HuggingFaceMlxModel(
|
||||||
enabled=True,
|
enabled=True,
|
||||||
artifacts_path=art_path,
|
artifacts_path=vlm_artifacts_path,
|
||||||
accelerator_options=self.pipeline_options.accelerator_options,
|
accelerator_options=self.pipeline_options.accelerator_options,
|
||||||
vlm_options=vlm_options,
|
vlm_options=vlm_options,
|
||||||
)
|
)
|
||||||
@@ -166,7 +172,7 @@ class ThreadedLayoutVlmPipeline(BasePipeline):
|
|||||||
|
|
||||||
self.vlm_model = VllmVlmModel(
|
self.vlm_model = VllmVlmModel(
|
||||||
enabled=True,
|
enabled=True,
|
||||||
artifacts_path=art_path,
|
artifacts_path=vlm_artifacts_path,
|
||||||
accelerator_options=self.pipeline_options.accelerator_options,
|
accelerator_options=self.pipeline_options.accelerator_options,
|
||||||
vlm_options=vlm_options,
|
vlm_options=vlm_options,
|
||||||
)
|
)
|
||||||
@@ -177,19 +183,20 @@ class ThreadedLayoutVlmPipeline(BasePipeline):
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported VLM options type: {type(base_vlm_options)}")
|
raise ValueError(f"Unsupported VLM options type: {type(base_vlm_options)}")
|
||||||
|
|
||||||
def _resolve_artifacts_path(self) -> Optional[Path]:
|
def _resolve_model_artifacts_path(
|
||||||
"""Resolve artifacts path from options or settings."""
|
self, base_path: Optional[Path], model_slug: str
|
||||||
if self.pipeline_options.artifacts_path:
|
) -> Optional[Path]:
|
||||||
p = Path(self.pipeline_options.artifacts_path).expanduser()
|
"""Resolve the artifacts path for a specific model using its slug."""
|
||||||
elif settings.artifacts_path:
|
if base_path is None:
|
||||||
p = Path(settings.artifacts_path).expanduser()
|
|
||||||
else:
|
|
||||||
return None
|
return None
|
||||||
if not p.is_dir():
|
|
||||||
|
candidate = base_path / model_slug
|
||||||
|
if candidate.is_dir():
|
||||||
|
return candidate
|
||||||
|
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"{p} does not exist or is not a directory containing the required models"
|
f"Expected artifacts_path {base_path} to contain model directory {model_slug}"
|
||||||
)
|
)
|
||||||
return p
|
|
||||||
|
|
||||||
def _create_run_ctx(self) -> RunContext:
|
def _create_run_ctx(self) -> RunContext:
|
||||||
"""Create pipeline stages and wire them together."""
|
"""Create pipeline stages and wire them together."""
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import pytest
|
|||||||
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
||||||
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
|
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
|
||||||
from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
|
from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
|
||||||
|
from docling.backend.image_backend import ImageDocumentBackend
|
||||||
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
|
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
|
||||||
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
||||||
from docling.datamodel.base_models import ConversionStatus, InputFormat, QualityGrade
|
from docling.datamodel.base_models import ConversionStatus, InputFormat, QualityGrade
|
||||||
@@ -13,9 +14,11 @@ from docling.datamodel.document import ConversionResult
|
|||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
PdfPipelineOptions,
|
PdfPipelineOptions,
|
||||||
TableFormerMode,
|
TableFormerMode,
|
||||||
|
VlmPipelineOptions,
|
||||||
)
|
)
|
||||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||||
from docling.pipeline.legacy_standard_pdf_pipeline import LegacyStandardPdfPipeline
|
from docling.pipeline.legacy_standard_pdf_pipeline import LegacyStandardPdfPipeline
|
||||||
|
from docling.pipeline.vlm_pipeline import VlmPipeline
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
@@ -192,6 +195,23 @@ def test_parser_backends(test_doc_path):
|
|||||||
assert doc_result.status == ConversionStatus.SUCCESS
|
assert doc_result.status == ConversionStatus.SUCCESS
|
||||||
|
|
||||||
|
|
||||||
|
def test_image_pipeline_preserves_custom_pipeline_cls():
|
||||||
|
with pytest.warns(DeprecationWarning):
|
||||||
|
converter = DocumentConverter(
|
||||||
|
format_options={
|
||||||
|
InputFormat.IMAGE: PdfFormatOption(
|
||||||
|
pipeline_cls=VlmPipeline,
|
||||||
|
pipeline_options=VlmPipelineOptions(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
image_format_option = converter.format_to_options[InputFormat.IMAGE]
|
||||||
|
|
||||||
|
assert image_format_option.pipeline_cls is VlmPipeline
|
||||||
|
assert image_format_option.backend is ImageDocumentBackend
|
||||||
|
|
||||||
|
|
||||||
def test_confidence(test_doc_path):
|
def test_confidence(test_doc_path):
|
||||||
converter = DocumentConverter()
|
converter = DocumentConverter()
|
||||||
doc_result: ConversionResult = converter.convert(test_doc_path, page_range=(6, 9))
|
doc_result: ConversionResult = converter.convert(test_doc_path, page_range=(6, 9))
|
||||||
|
|||||||
Reference in New Issue
Block a user