From cca05c45eaec154ae8470f9eb3577852d17773cd Mon Sep 17 00:00:00 2001 From: Christoph Auer <60343111+cau-git@users.noreply.github.com> Date: Fri, 18 Jul 2025 15:14:36 +0200 Subject: [PATCH] fix: Safe pipeline init, use device_map in transformers models (#1917) * Use device_map for transformer models Signed-off-by: Christoph Auer * Add accelerate Signed-off-by: Christoph Auer * Relax accelerate min version Signed-off-by: Christoph Auer * Make pipeline cache+init thread-safe Signed-off-by: Christoph Auer --------- Signed-off-by: Christoph Auer --- docling/document_converter.py | 25 +++++++++++-------- .../models/picture_description_vlm_model.py | 3 ++- pyproject.toml | 1 + uv.lock | 2 ++ 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/docling/document_converter.py b/docling/document_converter.py index 1a0a9d75..f3bcb89e 100644 --- a/docling/document_converter.py +++ b/docling/document_converter.py @@ -1,6 +1,7 @@ import hashlib import logging import sys +import threading import time from collections.abc import Iterable, Iterator from functools import partial @@ -49,6 +50,7 @@ from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline from docling.utils.utils import chunkify _log = logging.getLogger(__name__) +_PIPELINE_CACHE_LOCK = threading.Lock() class FormatOption(BaseModel): @@ -315,17 +317,18 @@ class DocumentConverter: # Use a composite key to cache pipelines cache_key = (pipeline_class, options_hash) - if cache_key not in self.initialized_pipelines: - _log.info( - f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}" - ) - self.initialized_pipelines[cache_key] = pipeline_class( - pipeline_options=pipeline_options - ) - else: - _log.debug( - f"Reusing cached pipeline for {pipeline_class.__name__} with options hash {options_hash}" - ) + with _PIPELINE_CACHE_LOCK: + if cache_key not in self.initialized_pipelines: + _log.info( + f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}" + ) + self.initialized_pipelines[cache_key] = pipeline_class( + pipeline_options=pipeline_options + ) + else: + _log.debug( + f"Reusing cached pipeline for {pipeline_class.__name__} with options hash {options_hash}" + ) return self.initialized_pipelines[cache_key] diff --git a/docling/models/picture_description_vlm_model.py b/docling/models/picture_description_vlm_model.py index 63c0af8a..a2d2f290 100644 --- a/docling/models/picture_description_vlm_model.py +++ b/docling/models/picture_description_vlm_model.py @@ -65,6 +65,7 @@ class PictureDescriptionVlmModel( self.processor = AutoProcessor.from_pretrained(artifacts_path) self.model = AutoModelForVision2Seq.from_pretrained( artifacts_path, + device_map=self.device, torch_dtype=torch.bfloat16, _attn_implementation=( "flash_attention_2" @@ -72,7 +73,7 @@ class PictureDescriptionVlmModel( and accelerator_options.cuda_use_flash_attention2 else "eager" ), - ).to(self.device) + ) self.provenance = f"{self.options.repo_id}" diff --git a/pyproject.toml b/pyproject.toml index 65919200..3450458d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,7 @@ dependencies = [ 'scipy (>=1.6.0,<2.0.0)', # 'scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"', # 'scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"', + "accelerate>=1.0.0,<2", ] [project.urls] diff --git a/uv.lock b/uv.lock index 76e71334..e3312cbb 100644 --- a/uv.lock +++ b/uv.lock @@ -809,6 +809,7 @@ name = "docling" version = "2.41.0" source = { editable = "." } dependencies = [ + { name = "accelerate" }, { name = "beautifulsoup4" }, { name = "certifi" }, { name = "docling-core", extra = ["chunking"] }, @@ -902,6 +903,7 @@ examples = [ [package.metadata] requires-dist = [ + { name = "accelerate", specifier = ">=1.0.0,<2" }, { name = "accelerate", marker = "extra == 'vlm'", specifier = ">=1.2.1,<2.0.0" }, { name = "beautifulsoup4", specifier = ">=4.12.3,<5.0.0" }, { name = "certifi", specifier = ">=2024.7.4" },