mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
Merge from main
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
commit
c33cc217cd
17
CHANGELOG.md
17
CHANGELOG.md
@ -1,3 +1,20 @@
|
|||||||
|
## [v2.42.0](https://github.com/docling-project/docling/releases/tag/v2.42.0) - 2025-07-18
|
||||||
|
|
||||||
|
### Feature
|
||||||
|
|
||||||
|
* Add option to control empty clusters in layout postprocessing ([#1940](https://github.com/docling-project/docling/issues/1940)) ([`a436be7`](https://github.com/docling-project/docling/commit/a436be73676101cc9461a17ae7a9ae72316a5096))
|
||||||
|
|
||||||
|
### Fix
|
||||||
|
|
||||||
|
* Safe pipeline init, use device_map in transformers models ([#1917](https://github.com/docling-project/docling/issues/1917)) ([`cca05c4`](https://github.com/docling-project/docling/commit/cca05c45eaec154ae8470f9eb3577852d17773cd))
|
||||||
|
* Fix HTML table parser and JATS backend bugs ([#1948](https://github.com/docling-project/docling/issues/1948)) ([`e1e3053`](https://github.com/docling-project/docling/commit/e1e305369552b82d3f09f0c113ea8b54d5c90658))
|
||||||
|
* KeyError: 'fPr' when processing latex fractions in DOCX files ([#1926](https://github.com/docling-project/docling/issues/1926)) ([`95e7096`](https://github.com/docling-project/docling/commit/95e70962f1d7cf1f339a88fde9c907111e194726))
|
||||||
|
* Change granite vision model URL from preview to stable version ([#1925](https://github.com/docling-project/docling/issues/1925)) ([`c5fb353`](https://github.com/docling-project/docling/commit/c5fb353f109dfe79b51c201ebb1ff33fceeae34a))
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
|
||||||
|
* Fix typos ([#1943](https://github.com/docling-project/docling/issues/1943)) ([`d6d2dbe`](https://github.com/docling-project/docling/commit/d6d2dbe2f99bd965c1bc8eec3d332d0acf731189))
|
||||||
|
|
||||||
## [v2.41.0](https://github.com/docling-project/docling/releases/tag/v2.41.0) - 2025-07-10
|
## [v2.41.0](https://github.com/docling-project/docling/releases/tag/v2.41.0) - 2025-07-10
|
||||||
|
|
||||||
### Feature
|
### Feature
|
||||||
|
@ -50,9 +50,7 @@ from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
|
|||||||
from docling.utils.utils import chunkify
|
from docling.utils.utils import chunkify
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
_PIPELINE_CACHE_LOCK = threading.Lock()
|
||||||
# Module-level lock for pipeline cache
|
|
||||||
_pipeline_cache_lock = threading.Lock()
|
|
||||||
|
|
||||||
|
|
||||||
class FormatOption(BaseModel):
|
class FormatOption(BaseModel):
|
||||||
@ -322,7 +320,7 @@ class DocumentConverter:
|
|||||||
# Use a composite key to cache pipelines
|
# Use a composite key to cache pipelines
|
||||||
cache_key = (pipeline_class, options_hash)
|
cache_key = (pipeline_class, options_hash)
|
||||||
|
|
||||||
with _pipeline_cache_lock:
|
with _PIPELINE_CACHE_LOCK:
|
||||||
if cache_key not in self.initialized_pipelines:
|
if cache_key not in self.initialized_pipelines:
|
||||||
_log.info(
|
_log.info(
|
||||||
f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}"
|
f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}"
|
||||||
|
@ -65,6 +65,7 @@ class PictureDescriptionVlmModel(
|
|||||||
self.processor = AutoProcessor.from_pretrained(artifacts_path)
|
self.processor = AutoProcessor.from_pretrained(artifacts_path)
|
||||||
self.model = AutoModelForVision2Seq.from_pretrained(
|
self.model = AutoModelForVision2Seq.from_pretrained(
|
||||||
artifacts_path,
|
artifacts_path,
|
||||||
|
device_map=self.device,
|
||||||
torch_dtype=torch.bfloat16,
|
torch_dtype=torch.bfloat16,
|
||||||
_attn_implementation=(
|
_attn_implementation=(
|
||||||
"flash_attention_2"
|
"flash_attention_2"
|
||||||
@ -72,7 +73,7 @@ class PictureDescriptionVlmModel(
|
|||||||
and accelerator_options.cuda_use_flash_attention2
|
and accelerator_options.cuda_use_flash_attention2
|
||||||
else "eager"
|
else "eager"
|
||||||
),
|
),
|
||||||
).to(self.device)
|
)
|
||||||
|
|
||||||
self.provenance = f"{self.options.repo_id}"
|
self.provenance = f"{self.options.repo_id}"
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "docling"
|
name = "docling"
|
||||||
version = "2.41.0" # DO NOT EDIT, updated automatically
|
version = "2.42.0" # DO NOT EDIT, updated automatically
|
||||||
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
|
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
keywords = [
|
keywords = [
|
||||||
@ -70,6 +70,7 @@ dependencies = [
|
|||||||
'scipy (>=1.6.0,<2.0.0)',
|
'scipy (>=1.6.0,<2.0.0)',
|
||||||
# 'scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"',
|
# 'scipy (>=1.6.0,<2.0.0) ; python_version >= "3.10"',
|
||||||
# 'scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"',
|
# 'scipy (>=1.6.0,<1.14.0) ; python_version < "3.10"',
|
||||||
|
"accelerate>=1.0.0,<2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
|
4
uv.lock
generated
4
uv.lock
generated
@ -806,9 +806,10 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "docling"
|
name = "docling"
|
||||||
version = "2.41.0"
|
version = "2.42.0"
|
||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
{ name = "accelerate" },
|
||||||
{ name = "beautifulsoup4" },
|
{ name = "beautifulsoup4" },
|
||||||
{ name = "certifi" },
|
{ name = "certifi" },
|
||||||
{ name = "docling-core", extra = ["chunking"] },
|
{ name = "docling-core", extra = ["chunking"] },
|
||||||
@ -902,6 +903,7 @@ examples = [
|
|||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
|
{ name = "accelerate", specifier = ">=1.0.0,<2" },
|
||||||
{ name = "accelerate", marker = "extra == 'vlm'", specifier = ">=1.2.1,<2.0.0" },
|
{ name = "accelerate", marker = "extra == 'vlm'", specifier = ">=1.2.1,<2.0.0" },
|
||||||
{ name = "beautifulsoup4", specifier = ">=4.12.3,<5.0.0" },
|
{ name = "beautifulsoup4", specifier = ">=4.12.3,<5.0.0" },
|
||||||
{ name = "certifi", specifier = ">=2024.7.4" },
|
{ name = "certifi", specifier = ">=2024.7.4" },
|
||||||
|
Loading…
Reference in New Issue
Block a user