mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-25 19:44:34 +00:00
Merge from main
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
commit
c33cc217cd
17
CHANGELOG.md
17
CHANGELOG.md
@ -1,3 +1,20 @@
|
||||
## [v2.42.0](https://github.com/docling-project/docling/releases/tag/v2.42.0) - 2025-07-18
|
||||
|
||||
### Feature
|
||||
|
||||
* Add option to control empty clusters in layout postprocessing ([#1940](https://github.com/docling-project/docling/issues/1940)) ([`a436be7`](https://github.com/docling-project/docling/commit/a436be73676101cc9461a17ae7a9ae72316a5096))
|
||||
|
||||
### Fix
|
||||
|
||||
* Safe pipeline init, use device_map in transformers models ([#1917](https://github.com/docling-project/docling/issues/1917)) ([`cca05c4`](https://github.com/docling-project/docling/commit/cca05c45eaec154ae8470f9eb3577852d17773cd))
|
||||
* Fix HTML table parser and JATS backend bugs ([#1948](https://github.com/docling-project/docling/issues/1948)) ([`e1e3053`](https://github.com/docling-project/docling/commit/e1e305369552b82d3f09f0c113ea8b54d5c90658))
|
||||
* KeyError: 'fPr' when processing latex fractions in DOCX files ([#1926](https://github.com/docling-project/docling/issues/1926)) ([`95e7096`](https://github.com/docling-project/docling/commit/95e70962f1d7cf1f339a88fde9c907111e194726))
|
||||
* Change granite vision model URL from preview to stable version ([#1925](https://github.com/docling-project/docling/issues/1925)) ([`c5fb353`](https://github.com/docling-project/docling/commit/c5fb353f109dfe79b51c201ebb1ff33fceeae34a))
|
||||
|
||||
### Documentation
|
||||
|
||||
* Fix typos ([#1943](https://github.com/docling-project/docling/issues/1943)) ([`d6d2dbe`](https://github.com/docling-project/docling/commit/d6d2dbe2f99bd965c1bc8eec3d332d0acf731189))
|
||||
|
||||
## [v2.41.0](https://github.com/docling-project/docling/releases/tag/v2.41.0) - 2025-07-10
|
||||
|
||||
### Feature
|
||||
|
@ -50,9 +50,7 @@ from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
|
||||
from docling.utils.utils import chunkify
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
# Module-level lock for pipeline cache
|
||||
_pipeline_cache_lock = threading.Lock()
|
||||
_PIPELINE_CACHE_LOCK = threading.Lock()
|
||||
|
||||
|
||||
class FormatOption(BaseModel):
|
||||
@ -322,7 +320,7 @@ class DocumentConverter:
|
||||
# Use a composite key to cache pipelines
|
||||
cache_key = (pipeline_class, options_hash)
|
||||
|
||||
with _pipeline_cache_lock:
|
||||
with _PIPELINE_CACHE_LOCK:
|
||||
if cache_key not in self.initialized_pipelines:
|
||||
_log.info(
|
||||
f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}"
|
||||
|
@ -65,6 +65,7 @@ class PictureDescriptionVlmModel(
|
||||
self.processor = AutoProcessor.from_pretrained(artifacts_path)
|
||||
self.model = AutoModelForVision2Seq.from_pretrained(
|
||||
artifacts_path,
|
||||
device_map=self.device,
|
||||
torch_dtype=torch.bfloat16,
|
||||
_attn_implementation=(
|
||||
"flash_attention_2"
|
||||
@ -72,7 +73,7 @@ class PictureDescriptionVlmModel(
|
||||
and accelerator_options.cuda_use_flash_attention2
|
||||
else "eager"
|
||||
),
|
||||
).to(self.device)
|
||||
)
|
||||
|
||||
self.provenance = f"{self.options.repo_id}"
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "docling"
|
||||
version = "2.41.0" # DO NOT EDIT, updated automatically
|
||||
version = "2.42.0" # DO NOT EDIT, updated automatically
|
||||
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
|
||||
license = "MIT"
|
||||
keywords = [
|
||||
@ -70,6 +70,7 @@ dependencies = [
|
||||
'scipy (>=1.6.0,<2.0.0)',
|
||||
# 'scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"',
|
||||
# 'scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"',
|
||||
"accelerate>=1.0.0,<2",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
4
uv.lock
generated
4
uv.lock
generated
@ -806,9 +806,10 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "docling"
|
||||
version = "2.41.0"
|
||||
version = "2.42.0"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "accelerate" },
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "certifi" },
|
||||
{ name = "docling-core", extra = ["chunking"] },
|
||||
@ -902,6 +903,7 @@ examples = [
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "accelerate", specifier = ">=1.0.0,<2" },
|
||||
{ name = "accelerate", marker = "extra == 'vlm'", specifier = ">=1.2.1,<2.0.0" },
|
||||
{ name = "beautifulsoup4", specifier = ">=4.12.3,<5.0.0" },
|
||||
{ name = "certifi", specifier = ">=2024.7.4" },
|
||||
|
Loading…
Reference in New Issue
Block a user