mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-10 13:48:13 +00:00
fix: Safe pipeline init, use device_map in transformers models (#1917)
* Use device_map for transformer models Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add accelerate Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Relax accelerate min version Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Make pipeline cache+init thread-safe Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import hashlib
|
||||
import logging
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from collections.abc import Iterable, Iterator
|
||||
from functools import partial
|
||||
@@ -49,6 +50,7 @@ from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
|
||||
from docling.utils.utils import chunkify
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
_PIPELINE_CACHE_LOCK = threading.Lock()
|
||||
|
||||
|
||||
class FormatOption(BaseModel):
|
||||
@@ -315,17 +317,18 @@ class DocumentConverter:
|
||||
# Use a composite key to cache pipelines
|
||||
cache_key = (pipeline_class, options_hash)
|
||||
|
||||
if cache_key not in self.initialized_pipelines:
|
||||
_log.info(
|
||||
f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}"
|
||||
)
|
||||
self.initialized_pipelines[cache_key] = pipeline_class(
|
||||
pipeline_options=pipeline_options
|
||||
)
|
||||
else:
|
||||
_log.debug(
|
||||
f"Reusing cached pipeline for {pipeline_class.__name__} with options hash {options_hash}"
|
||||
)
|
||||
with _PIPELINE_CACHE_LOCK:
|
||||
if cache_key not in self.initialized_pipelines:
|
||||
_log.info(
|
||||
f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}"
|
||||
)
|
||||
self.initialized_pipelines[cache_key] = pipeline_class(
|
||||
pipeline_options=pipeline_options
|
||||
)
|
||||
else:
|
||||
_log.debug(
|
||||
f"Reusing cached pipeline for {pipeline_class.__name__} with options hash {options_hash}"
|
||||
)
|
||||
|
||||
return self.initialized_pipelines[cache_key]
|
||||
|
||||
|
||||
@@ -65,6 +65,7 @@ class PictureDescriptionVlmModel(
|
||||
self.processor = AutoProcessor.from_pretrained(artifacts_path)
|
||||
self.model = AutoModelForVision2Seq.from_pretrained(
|
||||
artifacts_path,
|
||||
device_map=self.device,
|
||||
torch_dtype=torch.bfloat16,
|
||||
_attn_implementation=(
|
||||
"flash_attention_2"
|
||||
@@ -72,7 +73,7 @@ class PictureDescriptionVlmModel(
|
||||
and accelerator_options.cuda_use_flash_attention2
|
||||
else "eager"
|
||||
),
|
||||
).to(self.device)
|
||||
)
|
||||
|
||||
self.provenance = f"{self.options.repo_id}"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user