mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
feat: AutoOCR model selecting the best OCR model available and deprecating the usage of EasyOCR (#2391)
* add auto ocr model Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Apply suggestions from code review Co-authored-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> * add final log warning Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * propagate default options Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * allow rapidocr models download Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * remove modelscope Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Co-authored-by: Christoph Auer <60343111+cau-git@users.noreply.github.com>
This commit is contained in:
@@ -52,7 +52,8 @@ dependencies = [
|
||||
'pydantic-settings (>=2.3.0,<3.0.0)',
|
||||
'huggingface_hub (>=0.23,<1)',
|
||||
'requests (>=2.32.2,<3.0.0)',
|
||||
'easyocr (>=1.7,<2.0)',
|
||||
'ocrmac (>=1.0.0,<2.0.0) ; sys_platform == "darwin"',
|
||||
'rapidocr (>=3.3,<4.0.0) ; python_version < "3.14"',
|
||||
'certifi (>=2024.7.4)',
|
||||
'rtree (>=1.3.0,<2.0.0)',
|
||||
'typer (>=0.12.5,<0.20.0)',
|
||||
@@ -88,6 +89,7 @@ docling = "docling.cli.main:app"
|
||||
docling-tools = "docling.cli.tools:app"
|
||||
|
||||
[project.optional-dependencies]
|
||||
easyocr = ['easyocr (>=1.7,<2.0)']
|
||||
tesserocr = ['tesserocr (>=2.7.1,<3.0.0)']
|
||||
ocrmac = ['ocrmac (>=1.0.0,<2.0.0) ; sys_platform == "darwin"']
|
||||
vlm = [
|
||||
@@ -100,7 +102,6 @@ vlm = [
|
||||
rapidocr = [
|
||||
'rapidocr (>=3.3,<4.0.0) ; python_version < "3.14"',
|
||||
'onnxruntime (>=1.7.0,<2.0.0)',
|
||||
"modelscope>=1.29.0",
|
||||
# 'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
|
||||
# 'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
|
||||
]
|
||||
@@ -143,6 +144,7 @@ examples = [
|
||||
"langchain-huggingface>=0.0.3",
|
||||
"langchain-milvus~=0.1",
|
||||
"langchain-text-splitters~=0.2",
|
||||
"modelscope>=1.29.0",
|
||||
]
|
||||
constraints = [
|
||||
'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
|
||||
@@ -252,6 +254,7 @@ module = [
|
||||
"docling_ibm_models.*",
|
||||
"easyocr.*",
|
||||
"ocrmac.*",
|
||||
"onnxruntime.*",
|
||||
"mlx_vlm.*",
|
||||
"lxml.*",
|
||||
"huggingface_hub.*",
|
||||
|
||||
Reference in New Issue
Block a user