diff --git a/docling/models/vlm_models_inline/hf_transformers_model.py b/docling/models/vlm_models_inline/hf_transformers_model.py index e3d99715..6a2b4d93 100644 --- a/docling/models/vlm_models_inline/hf_transformers_model.py +++ b/docling/models/vlm_models_inline/hf_transformers_model.py @@ -287,6 +287,7 @@ class HuggingFaceTransformersVlmModel(BaseVlmPageModel, HuggingFaceModelDownload return_tensors="pt", padding=True, # pad across batch for both text and vision **self.vlm_options.extra_processor_kwargs, + ) inputs = {k: v.to(self.device) for k, v in inputs.items()}