From 4c916d65fedad0c3b50dd4cd371b703cadcf704e Mon Sep 17 00:00:00 2001
From: Shkarupa Alex <shkarupa.alex@gmail.com>
Date: Mon, 7 Jul 2025 17:04:29 +0300
Subject: [PATCH] Swap inference engine to LM Studio

Signed-off-by: Shkarupa Alex <shkarupa.alex@gmail.com>
---
 docs/examples/vlm_pipeline_api_model.py | 52 ++++++++++++-------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/docs/examples/vlm_pipeline_api_model.py b/docs/examples/vlm_pipeline_api_model.py
index badcb769..690e1254 100644
--- a/docs/examples/vlm_pipeline_api_model.py
+++ b/docs/examples/vlm_pipeline_api_model.py
@@ -34,27 +34,10 @@ def lms_vlm_options(model: str, prompt: str, format: ResponseFormat):
     return options
 
 
-#### Using Ollama
+#### Using LM Studio with OlmOcr model
 
 
-def ollama_vlm_options(model: str, prompt: str):
-    options = ApiVlmOptions(
-        url="http://localhost:11434/v1/chat/completions",  # the default Ollama endpoint
-        params=dict(
-            model=model,
-        ),
-        prompt=prompt,
-        timeout=90,
-        scale=1.0,
-        response_format=ResponseFormat.MARKDOWN,
-    )
-    return options
-
-
-#### Using Ollama with OlmOcr
-
-
-def ollama_olmocr_vlm_options(model: str):
+def lms_olmocr_vlm_options(model: str):
     def _dynamic_olmocr_prompt(page: Optional[SegmentedPage]):
         if page is None:
             return (
@@ -101,7 +84,7 @@ def ollama_olmocr_vlm_options(model: str):
         )
 
     options = ApiVlmOptions(
-        url="http://localhost:11434/v1/chat/completions",  # the default Ollama endpoint
+        url="http://localhost:1234/v1/chat/completions",
         params=dict(
             model=model,
         ),
@@ -114,6 +97,23 @@ def ollama_olmocr_vlm_options(model: str):
     return options
 
 
+#### Using Ollama
+
+
+def ollama_vlm_options(model: str, prompt: str):
+    options = ApiVlmOptions(
+        url="http://localhost:11434/v1/chat/completions",  # the default Ollama endpoint
+        params=dict(
+            model=model,
+        ),
+        prompt=prompt,
+        timeout=90,
+        scale=1.0,
+        response_format=ResponseFormat.MARKDOWN,
+    )
+    return options
+
+
 #### Using a cloud service like IBM watsonx.ai
 
 
@@ -180,6 +180,12 @@ def main():
         format=ResponseFormat.DOCTAGS,
     )
 
+    # Example using the OlmOcr (dynamic prompt) model with LM Studio:
+    # (uncomment the following lines)
+    # pipeline_options.vlm_options = lms_olmocr_vlm_options(
+    #     model="hf.co/lmstudio-community/olmOCR-7B-0225-preview-GGUF",
+    # )
+
     # Example using the Granite Vision model with LM Studio:
     # (uncomment the following lines)
     # pipeline_options.vlm_options = lms_vlm_options(
@@ -195,12 +201,6 @@ def main():
     #     prompt="OCR the full page to markdown.",
     # )
 
-    # Example using the OlmOcr (dynamic prompt) model with Ollama:
-    # (uncomment the following lines)
-    # pipeline_options.vlm_options = ollama_olmocr_vlm_options(
-    #     model="hf.co/allenai/olmOCR-7B-0225-preview",
-    # )
-
     # Another possibility is using online services, e.g. watsonx.ai.
     # Using requires setting the env variables WX_API_KEY and WX_PROJECT_ID.
     # (uncomment the following lines)