From 05123c934276079b5d4954bc1da1cf09f113a331 Mon Sep 17 00:00:00 2001
From: Christoph Auer <cau@zurich.ibm.com>
Date: Wed, 9 Jul 2025 16:49:21 +0200
Subject: [PATCH] Use device_map for transformer models

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
---
 docling/models/picture_description_vlm_model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docling/models/picture_description_vlm_model.py b/docling/models/picture_description_vlm_model.py
index 63c0af8a..a2d2f290 100644
--- a/docling/models/picture_description_vlm_model.py
+++ b/docling/models/picture_description_vlm_model.py
@@ -65,6 +65,7 @@ class PictureDescriptionVlmModel(
                 self.processor = AutoProcessor.from_pretrained(artifacts_path)
                 self.model = AutoModelForVision2Seq.from_pretrained(
                     artifacts_path,
+                    device_map=self.device,
                     torch_dtype=torch.bfloat16,
                     _attn_implementation=(
                         "flash_attention_2"
@@ -72,7 +73,7 @@ class PictureDescriptionVlmModel(
                         and accelerator_options.cuda_use_flash_attention2
                         else "eager"
                     ),
-                ).to(self.device)
+                )
 
             self.provenance = f"{self.options.repo_id}"