From 72dd815195909db67088eb268263c4b3294db726 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Tue, 8 Apr 2025 13:29:50 -0600 Subject: [PATCH] feat: Connect "granite_vision_ollama" pipeline option to CLI Branch: OllamaVlmModel Signed-off-by: Gabe Goodhart --- docling/cli/main.py | 3 +++ docling/cli/models.py | 1 + docling/pipeline/vlm_pipeline.py | 14 ++++++++++++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/docling/cli/main.py b/docling/cli/main.py index e0f0cbd8..71527b92 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -40,6 +40,7 @@ from docling.datamodel.pipeline_options import ( VlmModelType, VlmPipelineOptions, granite_vision_vlm_conversion_options, + granite_vision_vlm_ollama_conversion_options, smoldocling_vlm_conversion_options, smoldocling_vlm_mlx_conversion_options, ) @@ -535,6 +536,8 @@ def convert( if vlm_model == VlmModelType.GRANITE_VISION: pipeline_options.vlm_options = granite_vision_vlm_conversion_options + elif vlm_model == VlmModelType.GRANITE_VISION_OLLAMA: + pipeline_options.vlm_options = granite_vision_vlm_ollama_conversion_options elif vlm_model == VlmModelType.SMOLDOCLING: pipeline_options.vlm_options = smoldocling_vlm_conversion_options if sys.platform == "darwin": diff --git a/docling/cli/models.py b/docling/cli/models.py index 7bc313c1..5d38fe50 100644 --- a/docling/cli/models.py +++ b/docling/cli/models.py @@ -33,6 +33,7 @@ class _AvailableModels(str, Enum): PICTURE_CLASSIFIER = "picture_classifier" SMOLVLM = "smolvlm" GRANITE_VISION = "granite_vision" + GRANITE_VISION_OLLAMA = "granite_vision_ollama" EASYOCR = "easyocr" diff --git a/docling/pipeline/vlm_pipeline.py b/docling/pipeline/vlm_pipeline.py index d4defa89..3590d068 100644 --- a/docling/pipeline/vlm_pipeline.py +++ b/docling/pipeline/vlm_pipeline.py @@ -15,13 +15,16 @@ from docling.backend.pdf_backend import PdfDocumentBackend from docling.datamodel.base_models import InputFormat, Page from docling.datamodel.document import ConversionResult, InputDocument from docling.datamodel.pipeline_options import ( + HuggingFaceVlmOptions, InferenceFramework, + OllamaVlmOptions, ResponseFormat, VlmPipelineOptions, ) from docling.datamodel.settings import settings from docling.models.hf_mlx_model import HuggingFaceMlxModel from docling.models.hf_vlm_model import HuggingFaceVlmModel +from docling.models.ollama_vlm_model import OllamaVlmModel from docling.pipeline.base_pipeline import PaginatedPipeline from docling.utils.profiling import ProfilingScope, TimeRecorder @@ -57,7 +60,14 @@ class VlmPipeline(PaginatedPipeline): self.keep_images = self.pipeline_options.generate_page_images - if ( + if isinstance(pipeline_options.vlm_options, OllamaVlmOptions): + self.build_pipe = [ + OllamaVlmModel( + enabled=True, # must be always enabled for this pipeline to make sense. + vlm_options=self.pipeline_options.vlm_options, + ), + ] + elif ( self.pipeline_options.vlm_options.inference_framework == InferenceFramework.MLX ): @@ -69,7 +79,7 @@ class VlmPipeline(PaginatedPipeline): vlm_options=self.pipeline_options.vlm_options, ), ] - else: + elif isinstance(pipeline_options.vlm_options, HuggingFaceVlmOptions): self.build_pipe = [ HuggingFaceVlmModel( enabled=True, # must be always enabled for this pipeline to make sense.