From 1aa522792ac682600c6e8e4a46c5a25bbe447a05 Mon Sep 17 00:00:00 2001
From: Christoph Auer <cau@zurich.ibm.com>
Date: Fri, 15 Aug 2025 14:49:34 +0200
Subject: [PATCH] Tweak defaults

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
---
 .../models/vlm_models_inline/vllm_model.py    |  9 +++---
 .../threaded_multistage_vlm_pipeline.py       | 31 +++++++++++++++----
 2 files changed, 29 insertions(+), 11 deletions(-)
diff --git a/docling/models/vlm_models_inline/vllm_model.py b/docling/models/vlm_models_inline/vllm_model.py
index 61c84cde..bc18ea32 100644
--- a/docling/models/vlm_models_inline/vllm_model.py
+++ b/docling/models/vlm_models_inline/vllm_model.py
@@ -61,16 +61,15 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
             # Initialize VLLM LLM
             llm_kwargs = {
                 "model": str(artifacts_path),
-                "model_impl": "transformers",
                 "limit_mm_per_prompt": {"image": 1},
                 "trust_remote_code": vlm_options.trust_remote_code,
+                "model_impl": "transformers",
+                "gpu_memory_utilization": 0.3,  # hardcoded for now, leaves room for ~3 different models.
             }
 
             # Add device-specific configurations
-            if self.device.startswith("cuda"):
-                # VLLM automatically detects GPU
-                pass
-            elif self.device == "cpu":
+
+            if self.device == "cpu":
                 llm_kwargs["device"] = "cpu"
 
             # Add quantization if specified
diff --git a/docling/pipeline/threaded_multistage_vlm_pipeline.py b/docling/pipeline/threaded_multistage_vlm_pipeline.py
index 3ea2483a..9921af87 100644
--- a/docling/pipeline/threaded_multistage_vlm_pipeline.py
+++ b/docling/pipeline/threaded_multistage_vlm_pipeline.py
@@ -55,6 +55,7 @@ from docling.datamodel.pipeline_options_vlm_model import (
 from docling.datamodel.settings import settings
 from docling.datamodel.vlm_model_specs import (
     DOLPHIN_TRANSFORMERS,
+    SMOLDOCLING_MLX,
     SMOLDOCLING_TRANSFORMERS,
 )
 from docling.models.layout_model import LayoutModel
@@ -152,13 +153,27 @@ class ThreadedMultiStageVlmPipelineOptions(PaginatedPipelineOptions):
         """Create default pipeline options with custom VLM configurations from example."""
 
         # Configure VLM options based on the custom pipeline example
-        formula_opts = DOLPHIN_TRANSFORMERS.model_copy()
-        formula_opts.prompt = "<s>Read text in the image. <Answer/>"
+        # formula_opts = DOLPHIN_TRANSFORMERS.model_copy()
+        # formula_opts.prompt = "<s>Read text in the image. <Answer/>"
 
-        text_opts = DOLPHIN_TRANSFORMERS.model_copy()
-        text_opts.prompt = "<s>Read text in the image. <Answer/>"
+        # text_opts = DOLPHIN_TRANSFORMERS.model_copy()
+        # text_opts.prompt = "<s>Read text in the image. <Answer/>"
 
-        table_opts = SMOLDOCLING_TRANSFORMERS.model_copy()
+        base_model = SMOLDOCLING_TRANSFORMERS
+
+        formula_opts = base_model.model_copy()
+        formula_opts.prompt = "Convert formula to latex."
+        formula_opts.response_format = ResponseFormat.OTSL
+
+        code_opts = base_model.model_copy()
+        code_opts.prompt = "Convert code to text."
+        code_opts.response_format = ResponseFormat.OTSL
+
+        text_opts = base_model.model_copy()
+        text_opts.prompt = "Convert this page to docling."
+        text_opts.response_format = ResponseFormat.OTSL
+
+        table_opts = base_model.model_copy()
         table_opts.prompt = "Convert this table to OTSL."
         table_opts.response_format = ResponseFormat.OTSL
 
@@ -175,6 +190,11 @@ class ThreadedMultiStageVlmPipelineOptions(PaginatedPipelineOptions):
                     labels=[DocItemLabel.FORMULA],
                     batch_size=16,
                 ),
+                "code": VlmTaskConfig(
+                    vlm_options=code_opts,
+                    labels=[DocItemLabel.CODE],
+                    batch_size=16,
+                ),
                 "text": VlmTaskConfig(
                     vlm_options=text_opts,
                     labels=[
@@ -189,7 +209,6 @@ class ThreadedMultiStageVlmPipelineOptions(PaginatedPipelineOptions):
                         DocItemLabel.CHECKBOX_SELECTED,
                         DocItemLabel.CHECKBOX_UNSELECTED,
                         DocItemLabel.HANDWRITTEN_TEXT,
-                        DocItemLabel.CODE,
                         DocItemLabel.EMPTY_VALUE,
                     ],
                     batch_size=16,