feat: new artifacts path and CLI utility (#876)

* fix artifacts path

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add docling-models utility

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* missing formatting

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* rename utility to docling-tools

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* rename download methods and deprecation warnings

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* propagate artifacts path usage for ocr models

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* move function to utils

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* remove unused file

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* update docs

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* simplify downloading specific model(s)

Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>

* minor refactor

Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
Co-authored-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
This commit is contained in:
Michele Dolfi
2025-02-06 15:46:32 +01:00
committed by GitHub
parent 722a6eb7b9
commit ed74fe2ec0
15 changed files with 467 additions and 68 deletions

View File

@@ -60,6 +60,7 @@ onnxruntime = [
{ version = "^1.7.0", optional = true, markers = "python_version >= '3.10'" }
]
pillow = "^10.0.0"
tqdm = "^4.65.0"
[tool.poetry.group.dev.dependencies]
black = {extras = ["jupyter"], version = "^24.4.2"}
@@ -79,6 +80,7 @@ ipykernel = "^6.29.5"
ipywidgets = "^8.1.5"
nbqa = "^1.9.0"
types-openpyxl = "^3.1.5.20241114"
types-tqdm = "^4.67.0.20241221"
[tool.poetry.group.docs.dependencies]
mkdocs-material = "^9.5.40"
@@ -123,6 +125,7 @@ rapidocr = ["rapidocr-onnxruntime", "onnxruntime"]
[tool.poetry.scripts]
docling = "docling.cli.main:app"
docling-tools = "docling.cli.tools:app"
[build-system]
requires = ["poetry-core"]