From cca05c45eaec154ae8470f9eb3577852d17773cd Mon Sep 17 00:00:00 2001
From: Christoph Auer <60343111+cau-git@users.noreply.github.com>
Date: Fri, 18 Jul 2025 15:14:36 +0200
Subject: [PATCH] fix: Safe pipeline init, use device_map in transformers
 models (#1917)

* Use device_map for transformer models

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Add accelerate

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Relax accelerate min version

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Make pipeline cache+init thread-safe

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
---
 docling/document_converter.py                 | 25 +++++++++++--------
 .../models/picture_description_vlm_model.py   |  3 ++-
 pyproject.toml                                |  1 +
 uv.lock                                       |  2 ++
 4 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/docling/document_converter.py b/docling/document_converter.py
index 1a0a9d75..f3bcb89e 100644
--- a/docling/document_converter.py
+++ b/docling/document_converter.py
@@ -1,6 +1,7 @@
 import hashlib
 import logging
 import sys
+import threading
 import time
 from collections.abc import Iterable, Iterator
 from functools import partial
@@ -49,6 +50,7 @@ from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
 from docling.utils.utils import chunkify
 
 _log = logging.getLogger(__name__)
+_PIPELINE_CACHE_LOCK = threading.Lock()
 
 
 class FormatOption(BaseModel):
@@ -315,17 +317,18 @@ class DocumentConverter:
         # Use a composite key to cache pipelines
         cache_key = (pipeline_class, options_hash)
 
-        if cache_key not in self.initialized_pipelines:
-            _log.info(
-                f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}"
-            )
-            self.initialized_pipelines[cache_key] = pipeline_class(
-                pipeline_options=pipeline_options
-            )
-        else:
-            _log.debug(
-                f"Reusing cached pipeline for {pipeline_class.__name__} with options hash {options_hash}"
-            )
+        with _PIPELINE_CACHE_LOCK:
+            if cache_key not in self.initialized_pipelines:
+                _log.info(
+                    f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}"
+                )
+                self.initialized_pipelines[cache_key] = pipeline_class(
+                    pipeline_options=pipeline_options
+                )
+            else:
+                _log.debug(
+                    f"Reusing cached pipeline for {pipeline_class.__name__} with options hash {options_hash}"
+                )
 
         return self.initialized_pipelines[cache_key]
 
diff --git a/docling/models/picture_description_vlm_model.py b/docling/models/picture_description_vlm_model.py
index 63c0af8a..a2d2f290 100644
--- a/docling/models/picture_description_vlm_model.py
+++ b/docling/models/picture_description_vlm_model.py
@@ -65,6 +65,7 @@ class PictureDescriptionVlmModel(
                 self.processor = AutoProcessor.from_pretrained(artifacts_path)
                 self.model = AutoModelForVision2Seq.from_pretrained(
                     artifacts_path,
+                    device_map=self.device,
                     torch_dtype=torch.bfloat16,
                     _attn_implementation=(
                         "flash_attention_2"
@@ -72,7 +73,7 @@ class PictureDescriptionVlmModel(
                         and accelerator_options.cuda_use_flash_attention2
                         else "eager"
                     ),
-                ).to(self.device)
+                )
 
             self.provenance = f"{self.options.repo_id}"
 
diff --git a/pyproject.toml b/pyproject.toml
index 65919200..3450458d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,6 +70,7 @@ dependencies = [
   'scipy (>=1.6.0,<2.0.0)',
   # 'scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"',
   # 'scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"',
+  "accelerate>=1.0.0,<2",
 ]
 
 [project.urls]
diff --git a/uv.lock b/uv.lock
index 76e71334..e3312cbb 100644
--- a/uv.lock
+++ b/uv.lock
@@ -809,6 +809,7 @@ name = "docling"
 version = "2.41.0"
 source = { editable = "." }
 dependencies = [
+    { name = "accelerate" },
     { name = "beautifulsoup4" },
     { name = "certifi" },
     { name = "docling-core", extra = ["chunking"] },
@@ -902,6 +903,7 @@ examples = [
 
 [package.metadata]
 requires-dist = [
+    { name = "accelerate", specifier = ">=1.0.0,<2" },
     { name = "accelerate", marker = "extra == 'vlm'", specifier = ">=1.2.1,<2.0.0" },
     { name = "beautifulsoup4", specifier = ">=4.12.3,<5.0.0" },
     { name = "certifi", specifier = ">=2024.7.4" },