Tweak defaults

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-08-15 14:49:34 +02:00
parent 16fea9cd8b
commit 1aa522792a
2 changed files with 29 additions and 11 deletions

View File

@@ -61,16 +61,15 @@ class VllmVlmModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
# Initialize VLLM LLM # Initialize VLLM LLM
llm_kwargs = { llm_kwargs = {
"model": str(artifacts_path), "model": str(artifacts_path),
"model_impl": "transformers",
"limit_mm_per_prompt": {"image": 1}, "limit_mm_per_prompt": {"image": 1},
"trust_remote_code": vlm_options.trust_remote_code, "trust_remote_code": vlm_options.trust_remote_code,
"model_impl": "transformers",
"gpu_memory_utilization": 0.3, # hardcoded for now, leaves room for ~3 different models.
} }
# Add device-specific configurations # Add device-specific configurations
if self.device.startswith("cuda"):
# VLLM automatically detects GPU if self.device == "cpu":
pass
elif self.device == "cpu":
llm_kwargs["device"] = "cpu" llm_kwargs["device"] = "cpu"
# Add quantization if specified # Add quantization if specified

View File

@@ -55,6 +55,7 @@ from docling.datamodel.pipeline_options_vlm_model import (
from docling.datamodel.settings import settings from docling.datamodel.settings import settings
from docling.datamodel.vlm_model_specs import ( from docling.datamodel.vlm_model_specs import (
DOLPHIN_TRANSFORMERS, DOLPHIN_TRANSFORMERS,
SMOLDOCLING_MLX,
SMOLDOCLING_TRANSFORMERS, SMOLDOCLING_TRANSFORMERS,
) )
from docling.models.layout_model import LayoutModel from docling.models.layout_model import LayoutModel
@@ -152,13 +153,27 @@ class ThreadedMultiStageVlmPipelineOptions(PaginatedPipelineOptions):
"""Create default pipeline options with custom VLM configurations from example.""" """Create default pipeline options with custom VLM configurations from example."""
# Configure VLM options based on the custom pipeline example # Configure VLM options based on the custom pipeline example
formula_opts = DOLPHIN_TRANSFORMERS.model_copy() # formula_opts = DOLPHIN_TRANSFORMERS.model_copy()
formula_opts.prompt = "<s>Read text in the image. <Answer/>" # formula_opts.prompt = "<s>Read text in the image. <Answer/>"
text_opts = DOLPHIN_TRANSFORMERS.model_copy() # text_opts = DOLPHIN_TRANSFORMERS.model_copy()
text_opts.prompt = "<s>Read text in the image. <Answer/>" # text_opts.prompt = "<s>Read text in the image. <Answer/>"
table_opts = SMOLDOCLING_TRANSFORMERS.model_copy() base_model = SMOLDOCLING_TRANSFORMERS
formula_opts = base_model.model_copy()
formula_opts.prompt = "Convert formula to latex."
formula_opts.response_format = ResponseFormat.OTSL
code_opts = base_model.model_copy()
code_opts.prompt = "Convert code to text."
code_opts.response_format = ResponseFormat.OTSL
text_opts = base_model.model_copy()
text_opts.prompt = "Convert this page to docling."
text_opts.response_format = ResponseFormat.OTSL
table_opts = base_model.model_copy()
table_opts.prompt = "Convert this table to OTSL." table_opts.prompt = "Convert this table to OTSL."
table_opts.response_format = ResponseFormat.OTSL table_opts.response_format = ResponseFormat.OTSL
@@ -175,6 +190,11 @@ class ThreadedMultiStageVlmPipelineOptions(PaginatedPipelineOptions):
labels=[DocItemLabel.FORMULA], labels=[DocItemLabel.FORMULA],
batch_size=16, batch_size=16,
), ),
"code": VlmTaskConfig(
vlm_options=code_opts,
labels=[DocItemLabel.CODE],
batch_size=16,
),
"text": VlmTaskConfig( "text": VlmTaskConfig(
vlm_options=text_opts, vlm_options=text_opts,
labels=[ labels=[
@@ -189,7 +209,6 @@ class ThreadedMultiStageVlmPipelineOptions(PaginatedPipelineOptions):
DocItemLabel.CHECKBOX_SELECTED, DocItemLabel.CHECKBOX_SELECTED,
DocItemLabel.CHECKBOX_UNSELECTED, DocItemLabel.CHECKBOX_UNSELECTED,
DocItemLabel.HANDWRITTEN_TEXT, DocItemLabel.HANDWRITTEN_TEXT,
DocItemLabel.CODE,
DocItemLabel.EMPTY_VALUE, DocItemLabel.EMPTY_VALUE,
], ],
batch_size=16, batch_size=16,