From 0f395688b8caa72c9c427b15216fc438273deb4c Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Thu, 10 Jul 2025 06:48:34 +0200 Subject: [PATCH] refactored the code and added vlm2stage as a cli option Signed-off-by: Peter Staar --- docling/datamodel/pipeline_options.py | 2 -- docling/datamodel/pipeline_options_vlm_model.py | 2 +- docling/datamodel/vlm_model_specs.py | 7 +++++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 2b76a553..cea2594e 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -12,8 +12,6 @@ from pydantic import ( ) from typing_extensions import deprecated -from docling.datamodel import asr_model_specs - # Import the following for backwards compatibility from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions from docling.datamodel.asr_model_specs import ( diff --git a/docling/datamodel/pipeline_options_vlm_model.py b/docling/datamodel/pipeline_options_vlm_model.py index a38f0414..c5ed2e32 100644 --- a/docling/datamodel/pipeline_options_vlm_model.py +++ b/docling/datamodel/pipeline_options_vlm_model.py @@ -90,7 +90,7 @@ class ApiVlmOptions(BaseVlmOptions): response_format: ResponseFormat -class TwoStageVlmOptions(BaseVlmOptions): +class TwoStageVlmOptions(BaseModel): kind: Literal["inline_two_stage_model_options"] = "inline_two_stage_model_options" vlm_options: InlineVlmOptions diff --git a/docling/datamodel/vlm_model_specs.py b/docling/datamodel/vlm_model_specs.py index 5045c846..c8eefe3e 100644 --- a/docling/datamodel/vlm_model_specs.py +++ b/docling/datamodel/vlm_model_specs.py @@ -6,12 +6,14 @@ from pydantic import ( ) from docling.datamodel.accelerator_options import AcceleratorDevice +from docling.datamodel.layout_model_specs import docling_layout_heron from docling.datamodel.pipeline_options_vlm_model import ( ApiVlmOptions, InferenceFramework, InlineVlmOptions, ResponseFormat, TransformersModelType, + TwoStageVlmOptions, ) _log = logging.getLogger(__name__) @@ -137,8 +139,13 @@ GEMMA3_27B_MLX = InlineVlmOptions( temperature=0.0, ) +VLM2STAGE = TwoStageVlmOptions( + vlm_options=SMOLDOCLING_MLX, layout_options=docling_layout_heron +) + class VlmModelType(str, Enum): SMOLDOCLING = "smoldocling" GRANITE_VISION = "granite_vision" GRANITE_VISION_OLLAMA = "granite_vision_ollama" + VLM2STAGE = "docling2stage"