feat: Connect "granite_vision_ollama" pipeline option to CLI

Branch: OllamaVlmModel

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
Gabe Goodhart 2025-04-08 13:29:50 -06:00
parent 219d8db626
commit 72dd815195
3 changed files with 16 additions and 2 deletions

View File

@ -40,6 +40,7 @@ from docling.datamodel.pipeline_options import (
VlmModelType,
VlmPipelineOptions,
granite_vision_vlm_conversion_options,
granite_vision_vlm_ollama_conversion_options,
smoldocling_vlm_conversion_options,
smoldocling_vlm_mlx_conversion_options,
)
@ -535,6 +536,8 @@ def convert(
if vlm_model == VlmModelType.GRANITE_VISION:
pipeline_options.vlm_options = granite_vision_vlm_conversion_options
elif vlm_model == VlmModelType.GRANITE_VISION_OLLAMA:
pipeline_options.vlm_options = granite_vision_vlm_ollama_conversion_options
elif vlm_model == VlmModelType.SMOLDOCLING:
pipeline_options.vlm_options = smoldocling_vlm_conversion_options
if sys.platform == "darwin":

View File

@ -33,6 +33,7 @@ class _AvailableModels(str, Enum):
PICTURE_CLASSIFIER = "picture_classifier"
SMOLVLM = "smolvlm"
GRANITE_VISION = "granite_vision"
GRANITE_VISION_OLLAMA = "granite_vision_ollama"
EASYOCR = "easyocr"

View File

@ -15,13 +15,16 @@ from docling.backend.pdf_backend import PdfDocumentBackend
from docling.datamodel.base_models import InputFormat, Page
from docling.datamodel.document import ConversionResult, InputDocument
from docling.datamodel.pipeline_options import (
HuggingFaceVlmOptions,
InferenceFramework,
OllamaVlmOptions,
ResponseFormat,
VlmPipelineOptions,
)
from docling.datamodel.settings import settings
from docling.models.hf_mlx_model import HuggingFaceMlxModel
from docling.models.hf_vlm_model import HuggingFaceVlmModel
from docling.models.ollama_vlm_model import OllamaVlmModel
from docling.pipeline.base_pipeline import PaginatedPipeline
from docling.utils.profiling import ProfilingScope, TimeRecorder
@ -57,7 +60,14 @@ class VlmPipeline(PaginatedPipeline):
self.keep_images = self.pipeline_options.generate_page_images
if (
if isinstance(pipeline_options.vlm_options, OllamaVlmOptions):
self.build_pipe = [
OllamaVlmModel(
enabled=True, # must be always enabled for this pipeline to make sense.
vlm_options=self.pipeline_options.vlm_options,
),
]
elif (
self.pipeline_options.vlm_options.inference_framework
== InferenceFramework.MLX
):
@ -69,7 +79,7 @@ class VlmPipeline(PaginatedPipeline):
vlm_options=self.pipeline_options.vlm_options,
),
]
else:
elif isinstance(pipeline_options.vlm_options, HuggingFaceVlmOptions):
self.build_pipe = [
HuggingFaceVlmModel(
enabled=True, # must be always enabled for this pipeline to make sense.