From 90da15f61103408cb31316afe3e47142cc82160a Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Fri, 20 Jun 2025 07:47:12 +0200 Subject: [PATCH] initial reference to granite-doclong Signed-off-by: Peter Staar --- docling/cli/main.py | 15 +++++++++++++++ docling/datamodel/vlm_model_specs.py | 17 +++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/docling/cli/main.py b/docling/cli/main.py index 083f53b2..b4046825 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -50,6 +50,7 @@ from docling.datamodel.settings import settings from docling.datamodel.vlm_model_specs import ( GRANITE_VISION_OLLAMA, GRANITE_VISION_TRANSFORMERS, + GRANITEDOCLING_TRANSFORMERS, SMOLDOCLING_MLX, SMOLDOCLING_TRANSFORMERS, VlmModelType, @@ -595,6 +596,20 @@ def convert( # noqa: C901 "To run SmolDocling faster, please install mlx-vlm:\n" "pip install mlx-vlm" ) + elif vlm_model == VlmModelType.GRANITE_DOCLING: + pipeline_options.vlm_options = GRANITEDOCLING_TRANSFORMERS + """ + if sys.platform == "darwin": + try: + import mlx_vlm + + pipeline_options.vlm_options = GRANITEDOCLING_MLX + except ImportError: + _log.warning( + "To run SmolDocling faster, please install mlx-vlm:\n" + "pip install mlx-vlm" + ) + """ pdf_format_option = PdfFormatOption( pipeline_cls=VlmPipeline, pipeline_options=pipeline_options diff --git a/docling/datamodel/vlm_model_specs.py b/docling/datamodel/vlm_model_specs.py index 5045c846..43beb537 100644 --- a/docling/datamodel/vlm_model_specs.py +++ b/docling/datamodel/vlm_model_specs.py @@ -43,6 +43,22 @@ SMOLDOCLING_TRANSFORMERS = InlineVlmOptions( temperature=0.0, ) +# GraniteDocling +GRANITEDOCLING_TRANSFORMERS = InlineVlmOptions( + repo_id="ibm-granite/granite-docling-256m-preview", + prompt="Convert this page to docling.", + response_format=ResponseFormat.DOCTAGS, + inference_framework=InferenceFramework.TRANSFORMERS, + transformers_model_type=TransformersModelType.AUTOMODEL_VISION2SEQ, + supported_devices=[ + AcceleratorDevice.CPU, + AcceleratorDevice.CUDA, + AcceleratorDevice.MPS, + ], + scale=2.0, + temperature=0.0, +) + # GraniteVision GRANITE_VISION_TRANSFORMERS = InlineVlmOptions( repo_id="ibm-granite/granite-vision-3.2-2b", @@ -140,5 +156,6 @@ GEMMA3_27B_MLX = InlineVlmOptions( class VlmModelType(str, Enum): SMOLDOCLING = "smoldocling" + GRANITE_DOCLING = "granite_docling" GRANITE_VISION = "granite_vision" GRANITE_VISION_OLLAMA = "granite_vision_ollama"