feat: Connect "granite_vision_ollama" pipeline option to CLI

Branch: OllamaVlmModel

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
Gabe Goodhart 2025-04-08 13:29:50 -06:00
parent 219d8db626
commit 72dd815195
3 changed files with 16 additions and 2 deletions

View File

@ -40,6 +40,7 @@ from docling.datamodel.pipeline_options import (
VlmModelType, VlmModelType,
VlmPipelineOptions, VlmPipelineOptions,
granite_vision_vlm_conversion_options, granite_vision_vlm_conversion_options,
granite_vision_vlm_ollama_conversion_options,
smoldocling_vlm_conversion_options, smoldocling_vlm_conversion_options,
smoldocling_vlm_mlx_conversion_options, smoldocling_vlm_mlx_conversion_options,
) )
@ -535,6 +536,8 @@ def convert(
if vlm_model == VlmModelType.GRANITE_VISION: if vlm_model == VlmModelType.GRANITE_VISION:
pipeline_options.vlm_options = granite_vision_vlm_conversion_options pipeline_options.vlm_options = granite_vision_vlm_conversion_options
elif vlm_model == VlmModelType.GRANITE_VISION_OLLAMA:
pipeline_options.vlm_options = granite_vision_vlm_ollama_conversion_options
elif vlm_model == VlmModelType.SMOLDOCLING: elif vlm_model == VlmModelType.SMOLDOCLING:
pipeline_options.vlm_options = smoldocling_vlm_conversion_options pipeline_options.vlm_options = smoldocling_vlm_conversion_options
if sys.platform == "darwin": if sys.platform == "darwin":

View File

@ -33,6 +33,7 @@ class _AvailableModels(str, Enum):
PICTURE_CLASSIFIER = "picture_classifier" PICTURE_CLASSIFIER = "picture_classifier"
SMOLVLM = "smolvlm" SMOLVLM = "smolvlm"
GRANITE_VISION = "granite_vision" GRANITE_VISION = "granite_vision"
GRANITE_VISION_OLLAMA = "granite_vision_ollama"
EASYOCR = "easyocr" EASYOCR = "easyocr"

View File

@ -15,13 +15,16 @@ from docling.backend.pdf_backend import PdfDocumentBackend
from docling.datamodel.base_models import InputFormat, Page from docling.datamodel.base_models import InputFormat, Page
from docling.datamodel.document import ConversionResult, InputDocument from docling.datamodel.document import ConversionResult, InputDocument
from docling.datamodel.pipeline_options import ( from docling.datamodel.pipeline_options import (
HuggingFaceVlmOptions,
InferenceFramework, InferenceFramework,
OllamaVlmOptions,
ResponseFormat, ResponseFormat,
VlmPipelineOptions, VlmPipelineOptions,
) )
from docling.datamodel.settings import settings from docling.datamodel.settings import settings
from docling.models.hf_mlx_model import HuggingFaceMlxModel from docling.models.hf_mlx_model import HuggingFaceMlxModel
from docling.models.hf_vlm_model import HuggingFaceVlmModel from docling.models.hf_vlm_model import HuggingFaceVlmModel
from docling.models.ollama_vlm_model import OllamaVlmModel
from docling.pipeline.base_pipeline import PaginatedPipeline from docling.pipeline.base_pipeline import PaginatedPipeline
from docling.utils.profiling import ProfilingScope, TimeRecorder from docling.utils.profiling import ProfilingScope, TimeRecorder
@ -57,7 +60,14 @@ class VlmPipeline(PaginatedPipeline):
self.keep_images = self.pipeline_options.generate_page_images self.keep_images = self.pipeline_options.generate_page_images
if ( if isinstance(pipeline_options.vlm_options, OllamaVlmOptions):
self.build_pipe = [
OllamaVlmModel(
enabled=True, # must be always enabled for this pipeline to make sense.
vlm_options=self.pipeline_options.vlm_options,
),
]
elif (
self.pipeline_options.vlm_options.inference_framework self.pipeline_options.vlm_options.inference_framework
== InferenceFramework.MLX == InferenceFramework.MLX
): ):
@ -69,7 +79,7 @@ class VlmPipeline(PaginatedPipeline):
vlm_options=self.pipeline_options.vlm_options, vlm_options=self.pipeline_options.vlm_options,
), ),
] ]
else: elif isinstance(pipeline_options.vlm_options, HuggingFaceVlmOptions):
self.build_pipe = [ self.build_pipe = [
HuggingFaceVlmModel( HuggingFaceVlmModel(
enabled=True, # must be always enabled for this pipeline to make sense. enabled=True, # must be always enabled for this pipeline to make sense.