mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
feat: Threaded PDF pipeline (#1951)
* Initial async pdf pipeline Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * UpstreamAwareQueue Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Refactoring into async pipeline primitives and graph Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Cleanups and safety improvements Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Better threaded PDF pipeline Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Pin docling-ibm-models Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Remove unused args Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add test Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Revise pipeline Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Unload doc backend Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Revert "Unload doc backend" This reverts commit01066f0b6e. * Remove redundant method Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update threaded test Signed-off-by: Ubuntu <ubuntu@ip-172-31-30-253.eu-central-1.compute.internal> * Stop accumulating docs in test run Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fix: don't starve on docs with > max_queue_size pages Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fix: don't starve on docs with > max_queue_size pages Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * DCO Remediation Commit for Christoph Auer <cau@zurich.ibm.com> I, Christoph Auer <cau@zurich.ibm.com>, hereby add my Signed-off-by to this commit:fa71cde950I, Ubuntu <ubuntu@ip-172-31-30-253.eu-central-1.compute.internal>, hereby add my Signed-off-by to this commit:d66da87d96Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fix: python3.9 compat Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Option to enable threadpool with doc_batch_concurrency setting Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Clean up unused code Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fix settings defaults expectations Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Use released docling-ibm-models Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Remove ignores for typing/linting Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Signed-off-by: Ubuntu <ubuntu@ip-172-31-30-253.eu-central-1.compute.internal> Co-authored-by: Ubuntu <ubuntu@ip-172-31-30-253.eu-central-1.compute.internal>
This commit is contained in:
@@ -46,7 +46,7 @@ dependencies = [
|
||||
'pydantic (>=2.0.0,<3.0.0)',
|
||||
'docling-core[chunking] (>=2.42.0,<3.0.0)',
|
||||
'docling-parse (>=4.0.0,<5.0.0)',
|
||||
"docling-ibm-models>=3.6.0,<4",
|
||||
"docling-ibm-models>=3.9.0,<4",
|
||||
'filetype (>=1.2.0,<2.0.0)',
|
||||
'pypdfium2 (>=4.30.0,!=4.30.1,<5.0.0)',
|
||||
'pydantic-settings (>=2.3.0,<3.0.0)',
|
||||
@@ -149,8 +149,6 @@ constraints = [
|
||||
package = true
|
||||
default-groups = "all"
|
||||
|
||||
[tool.uv.sources]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["docling*"]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user