Swap inference engine to LM Studio

Signed-off-by: Shkarupa Alex <shkarupa.alex@gmail.com>
2025-07-26 20:14:47 +00:00 · 2025-07-07 17:04:29 +03:00 · 2025-07-07 17:04:29 +03:00 · 4c916d65fe
commit 4c916d65fe
parent a1df985ef4
1 changed files with 26 additions and 26 deletions
--- a/docs/examples/vlm_pipeline_api_model.py
+++ b/docs/examples/vlm_pipeline_api_model.py
@ -34,27 +34,10 @@ def lms_vlm_options(model: str, prompt: str, format: ResponseFormat):
    return options


-#### Using Ollama
+#### Using LM Studio with OlmOcr model


-def ollama_vlm_options(model: str, prompt: str):
-    options = ApiVlmOptions(
-        url="http://localhost:11434/v1/chat/completions",  # the default Ollama endpoint
-        params=dict(
-            model=model,
-        ),
-        prompt=prompt,
-        timeout=90,
-        scale=1.0,
-        response_format=ResponseFormat.MARKDOWN,
-    )
-    return options
-
-
-#### Using Ollama with OlmOcr
-
-
-def ollama_olmocr_vlm_options(model: str):
+def lms_olmocr_vlm_options(model: str):
    def _dynamic_olmocr_prompt(page: Optional[SegmentedPage]):
        if page is None:
            return (
@ -101,7 +84,7 @@ def ollama_olmocr_vlm_options(model: str):
        )

    options = ApiVlmOptions(
-        url="http://localhost:11434/v1/chat/completions",  # the default Ollama endpoint
+        url="http://localhost:1234/v1/chat/completions",
        params=dict(
            model=model,
        ),
@ -114,6 +97,23 @@ def ollama_olmocr_vlm_options(model: str):
    return options


+#### Using Ollama
+
+
+def ollama_vlm_options(model: str, prompt: str):
+    options = ApiVlmOptions(
+        url="http://localhost:11434/v1/chat/completions",  # the default Ollama endpoint
+        params=dict(
+            model=model,
+        ),
+        prompt=prompt,
+        timeout=90,
+        scale=1.0,
+        response_format=ResponseFormat.MARKDOWN,
+    )
+    return options
+
+
 #### Using a cloud service like IBM watsonx.ai


@ -180,6 +180,12 @@ def main():
        format=ResponseFormat.DOCTAGS,
    )

+    # Example using the OlmOcr (dynamic prompt) model with LM Studio:
+    # (uncomment the following lines)
+    # pipeline_options.vlm_options = lms_olmocr_vlm_options(
+    #     model="hf.co/lmstudio-community/olmOCR-7B-0225-preview-GGUF",
+    # )
+
    # Example using the Granite Vision model with LM Studio:
    # (uncomment the following lines)
    # pipeline_options.vlm_options = lms_vlm_options(
@ -195,12 +201,6 @@ def main():
    #     prompt="OCR the full page to markdown.",
    # )

-    # Example using the OlmOcr (dynamic prompt) model with Ollama:
-    # (uncomment the following lines)
-    # pipeline_options.vlm_options = ollama_olmocr_vlm_options(
-    #     model="hf.co/allenai/olmOCR-7B-0225-preview",
-    # )
-
    # Another possibility is using online services, e.g. watsonx.ai.
    # Using requires setting the env variables WX_API_KEY and WX_PROJECT_ID.
    # (uncomment the following lines)