diff --git a/CHANGELOG.md b/CHANGELOG.md index c01e9538..6a39a319 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,20 @@ +## [v2.42.0](https://github.com/docling-project/docling/releases/tag/v2.42.0) - 2025-07-18 + +### Feature + +* Add option to control empty clusters in layout postprocessing ([#1940](https://github.com/docling-project/docling/issues/1940)) ([`a436be7`](https://github.com/docling-project/docling/commit/a436be73676101cc9461a17ae7a9ae72316a5096)) + +### Fix + +* Safe pipeline init, use device_map in transformers models ([#1917](https://github.com/docling-project/docling/issues/1917)) ([`cca05c4`](https://github.com/docling-project/docling/commit/cca05c45eaec154ae8470f9eb3577852d17773cd)) +* Fix HTML table parser and JATS backend bugs ([#1948](https://github.com/docling-project/docling/issues/1948)) ([`e1e3053`](https://github.com/docling-project/docling/commit/e1e305369552b82d3f09f0c113ea8b54d5c90658)) +* KeyError: 'fPr' when processing latex fractions in DOCX files ([#1926](https://github.com/docling-project/docling/issues/1926)) ([`95e7096`](https://github.com/docling-project/docling/commit/95e70962f1d7cf1f339a88fde9c907111e194726)) +* Change granite vision model URL from preview to stable version ([#1925](https://github.com/docling-project/docling/issues/1925)) ([`c5fb353`](https://github.com/docling-project/docling/commit/c5fb353f109dfe79b51c201ebb1ff33fceeae34a)) + +### Documentation + +* Fix typos ([#1943](https://github.com/docling-project/docling/issues/1943)) ([`d6d2dbe`](https://github.com/docling-project/docling/commit/d6d2dbe2f99bd965c1bc8eec3d332d0acf731189)) + ## [v2.41.0](https://github.com/docling-project/docling/releases/tag/v2.41.0) - 2025-07-10 ### Feature diff --git a/docling/document_converter.py b/docling/document_converter.py index cc4afdb0..bbafb304 100644 --- a/docling/document_converter.py +++ b/docling/document_converter.py @@ -50,9 +50,7 @@ from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline from docling.utils.utils import chunkify _log = logging.getLogger(__name__) - -# Module-level lock for pipeline cache -_pipeline_cache_lock = threading.Lock() +_PIPELINE_CACHE_LOCK = threading.Lock() class FormatOption(BaseModel): @@ -322,7 +320,7 @@ class DocumentConverter: # Use a composite key to cache pipelines cache_key = (pipeline_class, options_hash) - with _pipeline_cache_lock: + with _PIPELINE_CACHE_LOCK: if cache_key not in self.initialized_pipelines: _log.info( f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}" diff --git a/docling/models/picture_description_vlm_model.py b/docling/models/picture_description_vlm_model.py index 63c0af8a..a2d2f290 100644 --- a/docling/models/picture_description_vlm_model.py +++ b/docling/models/picture_description_vlm_model.py @@ -65,6 +65,7 @@ class PictureDescriptionVlmModel( self.processor = AutoProcessor.from_pretrained(artifacts_path) self.model = AutoModelForVision2Seq.from_pretrained( artifacts_path, + device_map=self.device, torch_dtype=torch.bfloat16, _attn_implementation=( "flash_attention_2" @@ -72,7 +73,7 @@ class PictureDescriptionVlmModel( and accelerator_options.cuda_use_flash_attention2 else "eager" ), - ).to(self.device) + ) self.provenance = f"{self.options.repo_id}" diff --git a/pyproject.toml b/pyproject.toml index cecbc55f..33537754 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "docling" -version = "2.41.0" # DO NOT EDIT, updated automatically +version = "2.42.0" # DO NOT EDIT, updated automatically description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications." license = "MIT" keywords = [ @@ -70,6 +70,7 @@ dependencies = [ 'scipy (>=1.6.0,<2.0.0)', # 'scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"', # 'scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"', + "accelerate>=1.0.0,<2", ] [project.urls] diff --git a/uv.lock b/uv.lock index ab33bbcf..2aefa7be 100644 --- a/uv.lock +++ b/uv.lock @@ -806,9 +806,10 @@ wheels = [ [[package]] name = "docling" -version = "2.41.0" +version = "2.42.0" source = { editable = "." } dependencies = [ + { name = "accelerate" }, { name = "beautifulsoup4" }, { name = "certifi" }, { name = "docling-core", extra = ["chunking"] }, @@ -902,6 +903,7 @@ examples = [ [package.metadata] requires-dist = [ + { name = "accelerate", specifier = ">=1.0.0,<2" }, { name = "accelerate", marker = "extra == 'vlm'", specifier = ">=1.2.1,<2.0.0" }, { name = "beautifulsoup4", specifier = ">=4.12.3,<5.0.0" }, { name = "certifi", specifier = ">=2024.7.4" },