feat: introducing docling_backend (#26)

Uses our own docling_parse to reliably get PDF cells To get page images, this backend uses pypdfium2 Signed-off-by: Maxim Lysak <mly@zurich.ibm.com> Co-authored-by: Maxim Lysak <mly@zurich.ibm.com>
2025-12-09 05:08:14 +00:00 · 2024-08-07 16:22:36 +02:00
parent 62ba4aaf31
commit b8f5e38a8c
4 changed files with 203 additions and 6 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,6 +32,7 @@ pydantic-settings = "^2.3.0"
 huggingface_hub = ">=0.23,<1"
 requests = "^2.32.3"
 easyocr = { version = "^1.7", optional = true }
+docling-parse = "^0.0.1"

 [tool.poetry.group.dev.dependencies]
 black = {extras = ["jupyter"], version = "^24.4.2"}