refactored the code and added vlm2stage as a cli option

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
Peter Staar 2025-07-10 06:48:34 +02:00
parent dcf6fd6a41
commit 0f395688b8
3 changed files with 8 additions and 3 deletions

View File

@ -12,8 +12,6 @@ from pydantic import (
)
from typing_extensions import deprecated
from docling.datamodel import asr_model_specs
# Import the following for backwards compatibility
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
from docling.datamodel.asr_model_specs import (

View File

@ -90,7 +90,7 @@ class ApiVlmOptions(BaseVlmOptions):
response_format: ResponseFormat
class TwoStageVlmOptions(BaseVlmOptions):
class TwoStageVlmOptions(BaseModel):
kind: Literal["inline_two_stage_model_options"] = "inline_two_stage_model_options"
vlm_options: InlineVlmOptions

View File

@ -6,12 +6,14 @@ from pydantic import (
)
from docling.datamodel.accelerator_options import AcceleratorDevice
from docling.datamodel.layout_model_specs import docling_layout_heron
from docling.datamodel.pipeline_options_vlm_model import (
ApiVlmOptions,
InferenceFramework,
InlineVlmOptions,
ResponseFormat,
TransformersModelType,
TwoStageVlmOptions,
)
_log = logging.getLogger(__name__)
@ -137,8 +139,13 @@ GEMMA3_27B_MLX = InlineVlmOptions(
temperature=0.0,
)
VLM2STAGE = TwoStageVlmOptions(
vlm_options=SMOLDOCLING_MLX, layout_options=docling_layout_heron
)
class VlmModelType(str, Enum):
SMOLDOCLING = "smoldocling"
GRANITE_VISION = "granite_vision"
GRANITE_VISION_OLLAMA = "granite_vision_ollama"
VLM2STAGE = "docling2stage"