Swap inference engine to LM Studio

Signed-off-by: Shkarupa Alex <shkarupa.alex@gmail.com>
This commit is contained in:
Shkarupa Alex 2025-07-07 17:04:29 +03:00
parent a1df985ef4
commit 4c916d65fe

View File

@ -34,27 +34,10 @@ def lms_vlm_options(model: str, prompt: str, format: ResponseFormat):
return options
#### Using Ollama
#### Using LM Studio with OlmOcr model
def ollama_vlm_options(model: str, prompt: str):
options = ApiVlmOptions(
url="http://localhost:11434/v1/chat/completions", # the default Ollama endpoint
params=dict(
model=model,
),
prompt=prompt,
timeout=90,
scale=1.0,
response_format=ResponseFormat.MARKDOWN,
)
return options
#### Using Ollama with OlmOcr
def ollama_olmocr_vlm_options(model: str):
def lms_olmocr_vlm_options(model: str):
def _dynamic_olmocr_prompt(page: Optional[SegmentedPage]):
if page is None:
return (
@ -101,7 +84,7 @@ def ollama_olmocr_vlm_options(model: str):
)
options = ApiVlmOptions(
url="http://localhost:11434/v1/chat/completions", # the default Ollama endpoint
url="http://localhost:1234/v1/chat/completions",
params=dict(
model=model,
),
@ -114,6 +97,23 @@ def ollama_olmocr_vlm_options(model: str):
return options
#### Using Ollama
def ollama_vlm_options(model: str, prompt: str):
options = ApiVlmOptions(
url="http://localhost:11434/v1/chat/completions", # the default Ollama endpoint
params=dict(
model=model,
),
prompt=prompt,
timeout=90,
scale=1.0,
response_format=ResponseFormat.MARKDOWN,
)
return options
#### Using a cloud service like IBM watsonx.ai
@ -180,6 +180,12 @@ def main():
format=ResponseFormat.DOCTAGS,
)
# Example using the OlmOcr (dynamic prompt) model with LM Studio:
# (uncomment the following lines)
# pipeline_options.vlm_options = lms_olmocr_vlm_options(
# model="hf.co/lmstudio-community/olmOCR-7B-0225-preview-GGUF",
# )
# Example using the Granite Vision model with LM Studio:
# (uncomment the following lines)
# pipeline_options.vlm_options = lms_vlm_options(
@ -195,12 +201,6 @@ def main():
# prompt="OCR the full page to markdown.",
# )
# Example using the OlmOcr (dynamic prompt) model with Ollama:
# (uncomment the following lines)
# pipeline_options.vlm_options = ollama_olmocr_vlm_options(
# model="hf.co/allenai/olmOCR-7B-0225-preview",
# )
# Another possibility is using online services, e.g. watsonx.ai.
# Using requires setting the env variables WX_API_KEY and WX_PROJECT_ID.
# (uncomment the following lines)