Make pipeline cache+init thread-safe

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2025-07-18 11:00:48 +02:00
parent 1df31bc82f
commit ec0898b501

View File

@ -1,6 +1,7 @@
import hashlib import hashlib
import logging import logging
import sys import sys
import threading
import time import time
from collections.abc import Iterable, Iterator from collections.abc import Iterable, Iterator
from functools import partial from functools import partial
@ -49,6 +50,7 @@ from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
from docling.utils.utils import chunkify from docling.utils.utils import chunkify
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
_PIPELINE_CACHE_LOCK = threading.Lock()
class FormatOption(BaseModel): class FormatOption(BaseModel):
@ -315,6 +317,7 @@ class DocumentConverter:
# Use a composite key to cache pipelines # Use a composite key to cache pipelines
cache_key = (pipeline_class, options_hash) cache_key = (pipeline_class, options_hash)
with _PIPELINE_CACHE_LOCK:
if cache_key not in self.initialized_pipelines: if cache_key not in self.initialized_pipelines:
_log.info( _log.info(
f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}" f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}"