mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
feat: Upgrade to RapidOCR 3.x (#2088)
* feat: exploring new version * DCO Remediation Commit for Georg Heiler <georg.kf.heiler@gmail.com> I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: 5815c8f81b0e5ce400332597b6795e5a97ecf775 Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> * chore: autoformat DCO Remediation Commit for Georg Heiler <georg.kf.heiler@gmail.com> I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: 5815c8f81b0e5ce400332597b6795e5a97ecf775 * feat: enable configurable runtime for rapidocr and handle new result better; Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> * chore: fix linter Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> * chore: use new server model * chore: change default engine type to onnx * chore: tests update for new rapidocr * fix: rebase from main and fix clashes * DCO Remediation Commit for Georg Heiler <georg.kf.heiler@gmail.com> I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: 5815c8f81b0e5ce400332597b6795e5a97ecf775 I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: 02f9db85f562e5cdfda40c52fee55cfd4030d70a I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: a7bcb205faedb881f94a89b3bbd29cb31ccd54f0 I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: a39482a98cbcff7a825c8321134732af0c65930a I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: 63e9d717fa26951566b02761f3fdfc752c31f805 I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: ef12a6ec1ea2846a8a8e2e776eeaa59c2a0c4dfe Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> * DCO Remediation Commit for Georg Heiler <georg.kf.heiler@gmail.com> I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: 2222d2340387f8d9d66f3ca9d8e21a0945a44e7a I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: bc6a1dc507d7f146ec4797a2d3840414f46ac64d I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: 56e0d67da7c57d4b5caf8eaef8dff7056c3efd32 I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: 871ca21271412006c76acf3c19426140efed3d50 I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: 7b1b77159da729d483a581a86c7309acba1712a7 I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: a792a714a43e19a91b2b782f54621c1c5efda632 Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> * DCO Remediation Commit for Georg Heiler <georg.kf.heiler@gmail.com> I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: d1fed26323ff829b716bc667fe69532839363e45 I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: 346ec1cad943765f886e5d17fb0a54221124689c I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: 4d0bbe5bd6e9f7261b97362ff8823af244267089 I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: 34a5ad53892a7064a6bf35f890d344d464c78b2f I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: 9151959db3ad53535011d1cfdcf9181fdf936bb1 I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit: 8ef5536f2c098826c6c0a05190f8a80614c3f3cb Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> * DCO Remediation Commit for Georg Heiler <georg.kf.heiler@gmail.com> I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit:7e18637a35I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit:63fb8ff599I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit:0cb9444fb8I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit:38940d9978I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit:b6d461ac42I, Georg Heiler <georg.kf.heiler@gmail.com>, hereby add my Signed-off-by to this commit:ee55eb3408Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> --------- Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>
This commit is contained in:
@@ -99,6 +99,8 @@ class RapidOcrOptions(OcrOptions):
|
|||||||
# For more details on the following options visit
|
# For more details on the following options visit
|
||||||
# https://rapidai.github.io/RapidOCRDocs/install_usage/api/RapidOCR/
|
# https://rapidai.github.io/RapidOCRDocs/install_usage/api/RapidOCR/
|
||||||
|
|
||||||
|
# https://rapidai.github.io/RapidOCRDocs/main/install_usage/rapidocr/usage/#__tabbed_3_4
|
||||||
|
backend: Literal["onnxruntime", "openvino", "paddle", "torch"] = "onnxruntime"
|
||||||
text_score: float = 0.5 # same default as rapidocr
|
text_score: float = 0.5 # same default as rapidocr
|
||||||
|
|
||||||
use_det: Optional[bool] = None # same default as rapidocr
|
use_det: Optional[bool] = None # same default as rapidocr
|
||||||
|
|||||||
@@ -42,10 +42,10 @@ class RapidOcrModel(BaseOcrModel):
|
|||||||
|
|
||||||
if self.enabled:
|
if self.enabled:
|
||||||
try:
|
try:
|
||||||
from rapidocr_onnxruntime import RapidOCR # type: ignore
|
from rapidocr import EngineType, RapidOCR # type: ignore
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"RapidOCR is not installed. Please install it via `pip install rapidocr_onnxruntime` to use this OCR engine. "
|
"RapidOCR is not installed. Please install it via `pip install rapidocr onnxruntime` to use this OCR engine. "
|
||||||
"Alternatively, Docling has support for other OCR engines. See the documentation."
|
"Alternatively, Docling has support for other OCR engines. See the documentation."
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -54,21 +54,39 @@ class RapidOcrModel(BaseOcrModel):
|
|||||||
use_cuda = str(AcceleratorDevice.CUDA.value).lower() in device
|
use_cuda = str(AcceleratorDevice.CUDA.value).lower() in device
|
||||||
use_dml = accelerator_options.device == AcceleratorDevice.AUTO
|
use_dml = accelerator_options.device == AcceleratorDevice.AUTO
|
||||||
intra_op_num_threads = accelerator_options.num_threads
|
intra_op_num_threads = accelerator_options.num_threads
|
||||||
|
_ALIASES = {
|
||||||
|
"onnxruntime": EngineType.ONNXRUNTIME,
|
||||||
|
"openvino": EngineType.OPENVINO,
|
||||||
|
"paddle": EngineType.PADDLE,
|
||||||
|
"torch": EngineType.TORCH,
|
||||||
|
}
|
||||||
|
backend_enum = _ALIASES.get(self.options.backend, EngineType.ONNXRUNTIME)
|
||||||
|
|
||||||
self.reader = RapidOCR(
|
self.reader = RapidOCR(
|
||||||
text_score=self.options.text_score,
|
params={
|
||||||
cls_use_cuda=use_cuda,
|
# Global settings (these are still correct)
|
||||||
rec_use_cuda=use_cuda,
|
"Global.text_score": self.options.text_score,
|
||||||
det_use_cuda=use_cuda,
|
# "Global.verbose": self.options.print_verbose,
|
||||||
det_use_dml=use_dml,
|
# Detection model settings
|
||||||
cls_use_dml=use_dml,
|
"Det.model_path": self.options.det_model_path,
|
||||||
rec_use_dml=use_dml,
|
"Det.use_cuda": use_cuda,
|
||||||
intra_op_num_threads=intra_op_num_threads,
|
"Det.use_dml": use_dml,
|
||||||
print_verbose=self.options.print_verbose,
|
"Det.intra_op_num_threads": intra_op_num_threads,
|
||||||
det_model_path=self.options.det_model_path,
|
# Classification model settings
|
||||||
cls_model_path=self.options.cls_model_path,
|
"Cls.model_path": self.options.cls_model_path,
|
||||||
rec_model_path=self.options.rec_model_path,
|
"Cls.use_cuda": use_cuda,
|
||||||
rec_keys_path=self.options.rec_keys_path,
|
"Cls.use_dml": use_dml,
|
||||||
|
"Cls.intra_op_num_threads": intra_op_num_threads,
|
||||||
|
# Recognition model settings
|
||||||
|
"Rec.model_path": self.options.rec_model_path,
|
||||||
|
"Rec.keys_path": self.options.rec_keys_path,
|
||||||
|
"Rec.use_cuda": use_cuda,
|
||||||
|
"Rec.use_dml": use_dml,
|
||||||
|
"Rec.intra_op_num_threads": intra_op_num_threads,
|
||||||
|
"Det.engine_type": backend_enum,
|
||||||
|
"Cls.engine_type": backend_enum,
|
||||||
|
"Rec.engine_type": backend_enum,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
def __call__(
|
def __call__(
|
||||||
@@ -95,12 +113,15 @@ class RapidOcrModel(BaseOcrModel):
|
|||||||
scale=self.scale, cropbox=ocr_rect
|
scale=self.scale, cropbox=ocr_rect
|
||||||
)
|
)
|
||||||
im = numpy.array(high_res_image)
|
im = numpy.array(high_res_image)
|
||||||
result, _ = self.reader(
|
result = self.reader(
|
||||||
im,
|
im,
|
||||||
use_det=self.options.use_det,
|
use_det=self.options.use_det,
|
||||||
use_cls=self.options.use_cls,
|
use_cls=self.options.use_cls,
|
||||||
use_rec=self.options.use_rec,
|
use_rec=self.options.use_rec,
|
||||||
)
|
)
|
||||||
|
result = list(
|
||||||
|
zip(result.boxes.tolist(), result.txts, result.scores)
|
||||||
|
)
|
||||||
|
|
||||||
del high_res_image
|
del high_res_image
|
||||||
del im
|
del im
|
||||||
|
|||||||
10
docs/examples/rapidocr_with_custom_models.py
vendored
10
docs/examples/rapidocr_with_custom_models.py
vendored
@@ -1,6 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from huggingface_hub import snapshot_download
|
from modelscope import snapshot_download
|
||||||
|
|
||||||
from docling.datamodel.pipeline_options import PdfPipelineOptions, RapidOcrOptions
|
from docling.datamodel.pipeline_options import PdfPipelineOptions, RapidOcrOptions
|
||||||
from docling.document_converter import (
|
from docling.document_converter import (
|
||||||
@@ -17,17 +17,17 @@ def main():
|
|||||||
|
|
||||||
# Download RappidOCR models from HuggingFace
|
# Download RappidOCR models from HuggingFace
|
||||||
print("Downloading RapidOCR models")
|
print("Downloading RapidOCR models")
|
||||||
download_path = snapshot_download(repo_id="SWHL/RapidOCR")
|
download_path = snapshot_download(repo_id="RapidAI/RapidOCR")
|
||||||
|
|
||||||
# Setup RapidOcrOptions for english detection
|
# Setup RapidOcrOptions for english detection
|
||||||
det_model_path = os.path.join(
|
det_model_path = os.path.join(
|
||||||
download_path, "PP-OCRv4", "en_PP-OCRv3_det_infer.onnx"
|
download_path, "onnx", "PP-OCRv5", "det", "ch_PP-OCRv5_server_det.onnx"
|
||||||
)
|
)
|
||||||
rec_model_path = os.path.join(
|
rec_model_path = os.path.join(
|
||||||
download_path, "PP-OCRv4", "ch_PP-OCRv4_rec_server_infer.onnx"
|
download_path, "onnx", "PP-OCRv5", "rec", "ch_PP-OCRv5_rec_server_infer.onnx"
|
||||||
)
|
)
|
||||||
cls_model_path = os.path.join(
|
cls_model_path = os.path.join(
|
||||||
download_path, "PP-OCRv3", "ch_ppocr_mobile_v2.0_cls_train.onnx"
|
download_path, "onnx", "PP-OCRv4", "cls", "ch_ppocr_mobile_v2.0_cls_infer.onnx"
|
||||||
)
|
)
|
||||||
ocr_options = RapidOcrOptions(
|
ocr_options = RapidOcrOptions(
|
||||||
det_model_path=det_model_path,
|
det_model_path=det_model_path,
|
||||||
|
|||||||
2
docs/installation/index.md
vendored
2
docs/installation/index.md
vendored
@@ -31,7 +31,7 @@ Works on macOS, Linux, and Windows, with support for both x86_64 and arm64 archi
|
|||||||
| Tesseract | System dependency. See description for Tesseract and Tesserocr below. | `TesseractOcrOptions` |
|
| Tesseract | System dependency. See description for Tesseract and Tesserocr below. | `TesseractOcrOptions` |
|
||||||
| Tesseract CLI | System dependency. See description below. | `TesseractCliOcrOptions` |
|
| Tesseract CLI | System dependency. See description below. | `TesseractCliOcrOptions` |
|
||||||
| OcrMac | System dependency. See description below. | `OcrMacOptions` |
|
| OcrMac | System dependency. See description below. | `OcrMacOptions` |
|
||||||
| [RapidOCR](https://github.com/RapidAI/RapidOCR) | Extra feature not included in Default Docling installation can be installed via `pip install rapidocr_onnxruntime` | `RapidOcrOptions` |
|
| [RapidOCR](https://github.com/RapidAI/RapidOCR) | Extra feature not included in Default Docling installation can be installed via `pip install rapidocr onnxruntime` | `RapidOcrOptions` |
|
||||||
| [OnnxTR](https://github.com/felixdittrich92/OnnxTR) | Can be installed via the plugin system `pip install "docling-ocr-onnxtr[cpu]"`. Please take a look at [docling-OCR-OnnxTR](https://github.com/felixdittrich92/docling-OCR-OnnxTR).| `OnnxtrOcrOptions` |
|
| [OnnxTR](https://github.com/felixdittrich92/OnnxTR) | Can be installed via the plugin system `pip install "docling-ocr-onnxtr[cpu]"`. Please take a look at [docling-OCR-OnnxTR](https://github.com/felixdittrich92/docling-OCR-OnnxTR).| `OnnxtrOcrOptions` |
|
||||||
|
|
||||||
The Docling `DocumentConverter` allows to choose the OCR engine with the `ocr_options` settings. For example
|
The Docling `DocumentConverter` allows to choose the OCR engine with the `ocr_options` settings. For example
|
||||||
|
|||||||
@@ -96,8 +96,9 @@ vlm = [
|
|||||||
'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64"',
|
'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64"',
|
||||||
]
|
]
|
||||||
rapidocr = [
|
rapidocr = [
|
||||||
'rapidocr-onnxruntime (>=1.4.0,<2.0.0) ; python_version < "3.13"',
|
'rapidocr (>=3.3,<4.0.0) ; python_version < "3.14"',
|
||||||
'onnxruntime (>=1.7.0,<2.0.0)',
|
'onnxruntime (>=1.7.0,<2.0.0)',
|
||||||
|
"modelscope>=1.29.0",
|
||||||
# 'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
|
# 'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
|
||||||
# 'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
|
# 'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -55,8 +55,8 @@ def test_e2e_webp_conversions():
|
|||||||
TesseractCliOcrOptions(force_full_page_ocr=True, lang=["auto"]),
|
TesseractCliOcrOptions(force_full_page_ocr=True, lang=["auto"]),
|
||||||
]
|
]
|
||||||
|
|
||||||
# rapidocr is only available for Python >=3.6,<3.13
|
# rapidocr is only available for Python >=3.6,<3.14
|
||||||
if sys.version_info < (3, 13):
|
if sys.version_info < (3, 14):
|
||||||
engines.append(RapidOcrOptions())
|
engines.append(RapidOcrOptions())
|
||||||
engines.append(RapidOcrOptions(force_full_page_ocr=True))
|
engines.append(RapidOcrOptions(force_full_page_ocr=True))
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user