refactored the code and added vlm2stage as a cli option

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
Peter Staar 2025-07-10 06:48:34 +02:00
parent dcf6fd6a41
commit 0f395688b8
3 changed files with 8 additions and 3 deletions

View File

@ -12,8 +12,6 @@ from pydantic import (
) )
from typing_extensions import deprecated from typing_extensions import deprecated
from docling.datamodel import asr_model_specs
# Import the following for backwards compatibility # Import the following for backwards compatibility
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
from docling.datamodel.asr_model_specs import ( from docling.datamodel.asr_model_specs import (

View File

@ -90,7 +90,7 @@ class ApiVlmOptions(BaseVlmOptions):
response_format: ResponseFormat response_format: ResponseFormat
class TwoStageVlmOptions(BaseVlmOptions): class TwoStageVlmOptions(BaseModel):
kind: Literal["inline_two_stage_model_options"] = "inline_two_stage_model_options" kind: Literal["inline_two_stage_model_options"] = "inline_two_stage_model_options"
vlm_options: InlineVlmOptions vlm_options: InlineVlmOptions

View File

@ -6,12 +6,14 @@ from pydantic import (
) )
from docling.datamodel.accelerator_options import AcceleratorDevice from docling.datamodel.accelerator_options import AcceleratorDevice
from docling.datamodel.layout_model_specs import docling_layout_heron
from docling.datamodel.pipeline_options_vlm_model import ( from docling.datamodel.pipeline_options_vlm_model import (
ApiVlmOptions, ApiVlmOptions,
InferenceFramework, InferenceFramework,
InlineVlmOptions, InlineVlmOptions,
ResponseFormat, ResponseFormat,
TransformersModelType, TransformersModelType,
TwoStageVlmOptions,
) )
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -137,8 +139,13 @@ GEMMA3_27B_MLX = InlineVlmOptions(
temperature=0.0, temperature=0.0,
) )
VLM2STAGE = TwoStageVlmOptions(
vlm_options=SMOLDOCLING_MLX, layout_options=docling_layout_heron
)
class VlmModelType(str, Enum): class VlmModelType(str, Enum):
SMOLDOCLING = "smoldocling" SMOLDOCLING = "smoldocling"
GRANITE_VISION = "granite_vision" GRANITE_VISION = "granite_vision"
GRANITE_VISION_OLLAMA = "granite_vision_ollama" GRANITE_VISION_OLLAMA = "granite_vision_ollama"
VLM2STAGE = "docling2stage"