mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
feat: export document pages as multimodal output (#54)
* feat: export document pages as multimodal output Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * create a single parquet output Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add loading into HF datasets library Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * renaming Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * cleanup Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -23,7 +23,7 @@ packages = [{include = "docling"}]
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
pydantic = "^2.0.0"
|
||||
docling-core = "^1.1.2"
|
||||
docling-core = "^1.1.3"
|
||||
docling-ibm-models = "^1.1.3"
|
||||
deepsearch-glm = "^0.19.1"
|
||||
filetype = "^1.2.0"
|
||||
@@ -36,6 +36,7 @@ docling-parse = "^1.1.3"
|
||||
certifi = ">=2024.7.4"
|
||||
rtree = "^1.3.0"
|
||||
scipy = "^1.14.1"
|
||||
pyarrow = "^17.0.0"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
black = {extras = ["jupyter"], version = "^24.4.2"}
|
||||
@@ -51,6 +52,10 @@ types-requests = "^2.31.0.2"
|
||||
flake8-pyproject = "^1.2.3"
|
||||
pylint = "^2.17.5"
|
||||
|
||||
|
||||
[tool.poetry.group.examples.dependencies]
|
||||
datasets = "^2.21.0"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
Reference in New Issue
Block a user