Merge from main

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2025-07-19 17:28:13 +02:00
commit c33cc217cd
5 changed files with 26 additions and 7 deletions

View File

@ -1,3 +1,20 @@
## [v2.42.0](https://github.com/docling-project/docling/releases/tag/v2.42.0) - 2025-07-18
### Feature
* Add option to control empty clusters in layout postprocessing ([#1940](https://github.com/docling-project/docling/issues/1940)) ([`a436be7`](https://github.com/docling-project/docling/commit/a436be73676101cc9461a17ae7a9ae72316a5096))
### Fix
* Safe pipeline init, use device_map in transformers models ([#1917](https://github.com/docling-project/docling/issues/1917)) ([`cca05c4`](https://github.com/docling-project/docling/commit/cca05c45eaec154ae8470f9eb3577852d17773cd))
* Fix HTML table parser and JATS backend bugs ([#1948](https://github.com/docling-project/docling/issues/1948)) ([`e1e3053`](https://github.com/docling-project/docling/commit/e1e305369552b82d3f09f0c113ea8b54d5c90658))
* KeyError: 'fPr' when processing latex fractions in DOCX files ([#1926](https://github.com/docling-project/docling/issues/1926)) ([`95e7096`](https://github.com/docling-project/docling/commit/95e70962f1d7cf1f339a88fde9c907111e194726))
* Change granite vision model URL from preview to stable version ([#1925](https://github.com/docling-project/docling/issues/1925)) ([`c5fb353`](https://github.com/docling-project/docling/commit/c5fb353f109dfe79b51c201ebb1ff33fceeae34a))
### Documentation
* Fix typos ([#1943](https://github.com/docling-project/docling/issues/1943)) ([`d6d2dbe`](https://github.com/docling-project/docling/commit/d6d2dbe2f99bd965c1bc8eec3d332d0acf731189))
## [v2.41.0](https://github.com/docling-project/docling/releases/tag/v2.41.0) - 2025-07-10
### Feature

View File

@ -50,9 +50,7 @@ from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
from docling.utils.utils import chunkify
_log = logging.getLogger(__name__)
# Module-level lock for pipeline cache
_pipeline_cache_lock = threading.Lock()
_PIPELINE_CACHE_LOCK = threading.Lock()
class FormatOption(BaseModel):
@ -322,7 +320,7 @@ class DocumentConverter:
# Use a composite key to cache pipelines
cache_key = (pipeline_class, options_hash)
with _pipeline_cache_lock:
with _PIPELINE_CACHE_LOCK:
if cache_key not in self.initialized_pipelines:
_log.info(
f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}"

View File

@ -65,6 +65,7 @@ class PictureDescriptionVlmModel(
self.processor = AutoProcessor.from_pretrained(artifacts_path)
self.model = AutoModelForVision2Seq.from_pretrained(
artifacts_path,
device_map=self.device,
torch_dtype=torch.bfloat16,
_attn_implementation=(
"flash_attention_2"
@ -72,7 +73,7 @@ class PictureDescriptionVlmModel(
and accelerator_options.cuda_use_flash_attention2
else "eager"
),
).to(self.device)
)
self.provenance = f"{self.options.repo_id}"

View File

@ -1,6 +1,6 @@
[project]
name = "docling"
version = "2.41.0" # DO NOT EDIT, updated automatically
version = "2.42.0" # DO NOT EDIT, updated automatically
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
license = "MIT"
keywords = [
@ -70,6 +70,7 @@ dependencies = [
'scipy (>=1.6.0,<2.0.0)',
# 'scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"',
# 'scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"',
"accelerate>=1.0.0,<2",
]
[project.urls]

4
uv.lock generated
View File

@ -806,9 +806,10 @@ wheels = [
[[package]]
name = "docling"
version = "2.41.0"
version = "2.42.0"
source = { editable = "." }
dependencies = [
{ name = "accelerate" },
{ name = "beautifulsoup4" },
{ name = "certifi" },
{ name = "docling-core", extra = ["chunking"] },
@ -902,6 +903,7 @@ examples = [
[package.metadata]
requires-dist = [
{ name = "accelerate", specifier = ">=1.0.0,<2" },
{ name = "accelerate", marker = "extra == 'vlm'", specifier = ">=1.2.1,<2.0.0" },
{ name = "beautifulsoup4", specifier = ">=4.12.3,<5.0.0" },
{ name = "certifi", specifier = ">=2024.7.4" },