feat: Add adaptive OCR, factor out treatment of OCR areas and cell filtering (#38)

* Introduce adaptive OCR

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Factor out BaseOcrModel, add docling-parse backend tests, fixes

* Make easyocr default dep

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2024-08-20 15:28:03 +02:00
committed by GitHub
parent 47b8ad917e
commit e94d317c02
13 changed files with 285 additions and 83 deletions

View File

@@ -31,9 +31,10 @@ pypdfium2 = "^4.30.0"
pydantic-settings = "^2.3.0"
huggingface_hub = ">=0.23,<1"
requests = "^2.32.3"
easyocr = { version = "^1.7", optional = true }
easyocr = { version = "^1.7"}
docling-parse = "^0.2.0"
certifi = ">=2024.7.4"
rtree = "^1.3.0"
[tool.poetry.group.dev.dependencies]
black = {extras = ["jupyter"], version = "^24.4.2"}
@@ -50,7 +51,6 @@ flake8-pyproject = "^1.2.3"
pylint = "^2.17.5"
[tool.poetry.extras]
easyocr = ["easyocr"]
ocr = ["easyocr"]
[build-system]