mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
feat(ocr): added support for RapidOCR engine (#415)
* adding rapidocr engine for ocr in docling Signed-off-by: swayam-singhal <swayam.singhal@inito.com> * fixing styling format Signed-off-by: Swaymaw <swaymaw@gmail.com> * updating pyproject.toml and poetry.lock to fix ci bugs Signed-off-by: Swaymaw <swaymaw@gmail.com> * help poetry pinning for python3.9 Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * simplifying rapidocr options so that device can be changed using a single option for all models Signed-off-by: Swaymaw <swaymaw@gmail.com> * fix styling issues and small bug in rapidOcrOptions Signed-off-by: Swaymaw <swaymaw@gmail.com> * use default device until we enable global management Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: swayam-singhal <swayam.singhal@inito.com> Signed-off-by: Swaymaw <swaymaw@gmail.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: swayam-singhal <swayam.singhal@inito.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -50,6 +50,13 @@ marko = "^2.1.2"
|
||||
openpyxl = "^3.1.5"
|
||||
lxml = ">=4.0.0,<6.0.0"
|
||||
ocrmac = { version = "^1.0.0", markers = "sys_platform == 'darwin'", optional = true }
|
||||
rapidocr-onnxruntime = { version = "^1.4.0", optional = true, markers = "python_version < '3.13'" }
|
||||
onnxruntime = [
|
||||
# 1.19.2 is the last version with python3.9 support,
|
||||
# see https://github.com/microsoft/onnxruntime/releases/tag/v1.20.0
|
||||
{ version = ">=1.7.0,<1.20.0", optional = true, markers = "python_version < '3.10'" },
|
||||
{ version = "^1.7.0", optional = true, markers = "python_version >= '3.10'" }
|
||||
]
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
black = {extras = ["jupyter"], version = "^24.4.2"}
|
||||
@@ -104,6 +111,7 @@ torchvision = [
|
||||
[tool.poetry.extras]
|
||||
tesserocr = ["tesserocr"]
|
||||
ocrmac = ["ocrmac"]
|
||||
rapidocr = ["rapidocr-onnxruntime", "onnxruntime"]
|
||||
|
||||
[tool.poetry.scripts]
|
||||
docling = "docling.cli.main:app"
|
||||
|
||||
Reference in New Issue
Block a user