feat: add options for choosing OCR engines (#118)

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com>
Signed-off-by: Peter Staar <taa@zurich.ibm.com>
Co-authored-by: Nikos Livathinos <nli@zurich.ibm.com>
Co-authored-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2024-10-08 19:07:08 +02:00
committed by GitHub
parent d412c363d7
commit f96ea86a00
20 changed files with 699 additions and 32 deletions

View File

@@ -46,6 +46,7 @@ pydantic-settings = "^2.3.0"
huggingface_hub = ">=0.23,<1"
requests = "^2.32.3"
easyocr = "^1.7"
tesserocr = { version = "^2.7.1", optional = true }
docling-parse = "^1.4.1"
certifi = ">=2024.7.4"
rtree = "^1.3.0"
@@ -81,6 +82,9 @@ langchain-huggingface = "^0.0.3"
langchain-milvus = "^0.1.4"
langchain-text-splitters = "^0.2.4"
[tool.poetry.extras]
tesserocr = ["tesserocr"]
[tool.poetry.scripts]
docling = "docling.cli.main:app"