Swap inference engine to LM Studio

Signed-off-by: Shkarupa Alex <shkarupa.alex@gmail.com>
2025-07-26 20:14:47 +00:00 · 2025-07-07 17:04:29 +03:00 · 2025-07-07 17:04:29 +03:00 · 4c916d65fe
commit 4c916d65fe
parent a1df985ef4
1 changed files with 26 additions and 26 deletions
--- a/docs/examples/vlm_pipeline_api_model.py
+++ b/docs/examples/vlm_pipeline_api_model.py
@ -34,27 +34,10 @@ def lms_vlm_options(model: str, prompt: str, format: ResponseFormat):
    return options
-#### Using Ollama
+#### Using LM Studio with OlmOcr model
-def ollama_vlm_options(model: str, prompt: str):
+def lms_olmocr_vlm_options(model: str):
    options = ApiVlmOptions(
        url="http://localhost:11434/v1/chat/completions",  # the default Ollama endpoint
        params=dict(
            model=model,
        ),
        prompt=prompt,
        timeout=90,
        scale=1.0,
        response_format=ResponseFormat.MARKDOWN,
    )
    return options
 #### Using Ollama with OlmOcr
 def ollama_olmocr_vlm_options(model: str):
    def _dynamic_olmocr_prompt(page: Optional[SegmentedPage]):
        if page is None:
            return (
@ -101,7 +84,7 @@ def ollama_olmocr_vlm_options(model: str):
        )
    options = ApiVlmOptions(
-        url="http://localhost:11434/v1/chat/completions",  # the default Ollama endpoint
+        url="http://localhost:1234/v1/chat/completions",
        params=dict(
            model=model,
        ),
@ -114,6 +97,23 @@ def ollama_olmocr_vlm_options(model: str):
    return options
 #### Using Ollama
 def ollama_vlm_options(model: str, prompt: str):
    options = ApiVlmOptions(
        url="http://localhost:11434/v1/chat/completions",  # the default Ollama endpoint
        params=dict(
            model=model,
        ),
        prompt=prompt,
        timeout=90,
        scale=1.0,
        response_format=ResponseFormat.MARKDOWN,
    )
    return options
 #### Using a cloud service like IBM watsonx.ai
@ -180,6 +180,12 @@ def main():
        format=ResponseFormat.DOCTAGS,
    )
    # Example using the OlmOcr (dynamic prompt) model with LM Studio:
    # (uncomment the following lines)
    # pipeline_options.vlm_options = lms_olmocr_vlm_options(
    #     model="hf.co/lmstudio-community/olmOCR-7B-0225-preview-GGUF",
    # )
    # Example using the Granite Vision model with LM Studio:
    # (uncomment the following lines)
    # pipeline_options.vlm_options = lms_vlm_options(
@ -195,12 +201,6 @@ def main():
    #     prompt="OCR the full page to markdown.",
    # )
    # Example using the OlmOcr (dynamic prompt) model with Ollama:
    # (uncomment the following lines)
    # pipeline_options.vlm_options = ollama_olmocr_vlm_options(
    #     model="hf.co/allenai/olmOCR-7B-0225-preview",
    # )
    # Another possibility is using online services, e.g. watsonx.ai.
    # Using requires setting the env variables WX_API_KEY and WX_PROJECT_ID.
    # (uncomment the following lines)