feat: introducing docling_backend (#26)

Uses our own docling_parse to reliably get PDF cells
To get page images, this backend uses pypdfium2

Signed-off-by: Maxim Lysak <mly@zurich.ibm.com>
Co-authored-by: Maxim Lysak <mly@zurich.ibm.com>
This commit is contained in:
Maxim Lysak
2024-08-07 16:22:36 +02:00
committed by GitHub
parent 62ba4aaf31
commit b8f5e38a8c
4 changed files with 203 additions and 6 deletions

View File

@@ -32,6 +32,7 @@ pydantic-settings = "^2.3.0"
huggingface_hub = ">=0.23,<1"
requests = "^2.32.3"
easyocr = { version = "^1.7", optional = true }
docling-parse = "^0.0.1"
[tool.poetry.group.dev.dependencies]
black = {extras = ["jupyter"], version = "^24.4.2"}