Adjust example instatiation of multi-stage VLM pipeline

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-08-18 14:36:42 +02:00
parent 3d07f1c78e
commit 4a107f4f57
3 changed files with 8 additions and 13 deletions

View File

@@ -229,7 +229,6 @@ DOLPHIN_TRANSFORMERS = InlineVlmOptions(
],
scale=2.0,
temperature=0.0,
max_new_tokens=4096,
)

View File

@@ -280,9 +280,7 @@ class HuggingFaceTransformersVlmModel(BaseVlmPageModel, HuggingFaceModelDownload
padding=True, # pad across batch for both text and vision
# no truncation by default; match SmolDocling examples
)
inputs = {
k: (v.to(self.device) if hasattr(v, "to") else v) for k, v in inputs.items()
}
inputs = {k: v.to(self.device) for k, v in inputs.items()}
# -- Optional stopping criteria
stopping_criteria = None
@@ -302,7 +300,7 @@ class HuggingFaceTransformersVlmModel(BaseVlmPageModel, HuggingFaceModelDownload
"max_new_tokens": self.max_new_tokens,
"use_cache": self.use_cache,
"generation_config": self.generation_config,
"temperature": self.temperature,
# "temperature": self.temperature,
**self.vlm_options.extra_generation_config,
}
if stopping_criteria is not None:

View File

@@ -160,16 +160,14 @@ class ThreadedMultiStageVlmPipelineOptions(PaginatedPipelineOptions):
smoldocling_model = SMOLDOCLING_TRANSFORMERS
text_opts = base_model.model_copy()
# text_opts.prompt = "Convert this page to docling."
text_opts.prompt = "What does it say?"
text_opts.response_format = ResponseFormat.PLAINTEXT
text_opts.max_new_tokens = 4096
text_opts.prompt = "Convert this page to docling."
text_opts.response_format = ResponseFormat.DOCTAGS
text_opts.max_new_tokens = 1024
formula_opts = base_model.model_copy()
# formula_opts.prompt = "Convert formula to latex."
formula_opts.prompt = "What does it say?"
formula_opts.response_format = ResponseFormat.PLAINTEXT
formula_opts.max_new_tokens = 4096
formula_opts.prompt = "Convert formula to latex."
formula_opts.response_format = ResponseFormat.DOCTAGS
formula_opts.max_new_tokens = 512
code_opts = smoldocling_model.model_copy()
code_opts.prompt = "Convert code to text."