feat: docling-parse v2 as default PDF backend (#549)

* Move to_docling_document from ds-glm to this repo

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Upgrade to ds-glm 1.0 and docling-parse 3.0

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Update lock

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Fix DP2 backend code, change CLI default backend

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2024-12-09 13:26:17 +01:00
committed by GitHub
parent 9fd2cf847a
commit aca57f0527
8 changed files with 500 additions and 177 deletions

View File

@@ -28,7 +28,7 @@ python = "^3.9"
docling-core = { version = "^2.8.0", extras = ["chunking"] }
pydantic = "^2.0.0"
docling-ibm-models = "^2.0.6"
deepsearch-glm = "^0.26.1"
deepsearch-glm = "^1.0.0"
filetype = "^1.2.0"
pypdfium2 = "^4.30.0"
pydantic-settings = "^2.3.0"
@@ -36,7 +36,7 @@ huggingface_hub = ">=0.23,<1"
requests = "^2.32.3"
easyocr = "^1.7"
tesserocr = { version = "^2.7.1", optional = true }
docling-parse = "^2.0.5"
docling-parse = "^3.0.0"
certifi = ">=2024.7.4"
rtree = "^1.3.0"
scipy = "^1.6.0"