feat: Add option plumbing for OllamaVlmOptions in pipeline_options

Branch: OllamaVlmModel

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
Gabe Goodhart 2025-04-08 13:29:03 -06:00
parent 17f381da4f
commit 5902d9e1c1

View File

@ -266,6 +266,7 @@ class ResponseFormat(str, Enum):
class InferenceFramework(str, Enum): class InferenceFramework(str, Enum):
MLX = "mlx" MLX = "mlx"
TRANSFORMERS = "transformers" TRANSFORMERS = "transformers"
OLLAMA = "ollama"
class HuggingFaceVlmOptions(BaseVlmOptions): class HuggingFaceVlmOptions(BaseVlmOptions):
@ -284,6 +285,16 @@ class HuggingFaceVlmOptions(BaseVlmOptions):
return self.repo_id.replace("/", "--") return self.repo_id.replace("/", "--")
class OllamaVlmOptions(BaseVlmOptions):
kind: Literal["ollama_model_options"] = "ollama_model_options"
model_id: str
base_url: str = "http://localhost:11434"
num_ctx: int | None = None
scale: float = 2.0
response_format: ResponseFormat
smoldocling_vlm_mlx_conversion_options = HuggingFaceVlmOptions( smoldocling_vlm_mlx_conversion_options = HuggingFaceVlmOptions(
repo_id="ds4sd/SmolDocling-256M-preview-mlx-bf16", repo_id="ds4sd/SmolDocling-256M-preview-mlx-bf16",
prompt="Convert this page to docling.", prompt="Convert this page to docling.",
@ -307,10 +318,18 @@ granite_vision_vlm_conversion_options = HuggingFaceVlmOptions(
inference_framework=InferenceFramework.TRANSFORMERS, inference_framework=InferenceFramework.TRANSFORMERS,
) )
granite_vision_vlm_ollama_conversion_options = OllamaVlmOptions(
model_id="granite3.2-vision:2b",
prompt="OCR the full page to markdown.",
scale = 1.0,
response_format=ResponseFormat.MARKDOWN,
)
class VlmModelType(str, Enum): class VlmModelType(str, Enum):
SMOLDOCLING = "smoldocling" SMOLDOCLING = "smoldocling"
GRANITE_VISION = "granite_vision" GRANITE_VISION = "granite_vision"
GRANITE_VISION_OLLAMA = "granite_vision_ollama"
# Define an enum for the backend options # Define an enum for the backend options