From 7e18637a35c6786c90bc41b40607404f4b084b45 Mon Sep 17 00:00:00 2001 From: Georg Heiler Date: Mon, 18 Aug 2025 07:16:31 +0200 Subject: [PATCH] feat: exploring new version --- docling/models/rapid_ocr_model.py | 41 +++++++++++++------- docs/examples/rapidocr_with_custom_models.py | 10 ++--- docs/installation/index.md | 2 +- pyproject.toml | 3 +- 4 files changed, 34 insertions(+), 22 deletions(-) diff --git a/docling/models/rapid_ocr_model.py b/docling/models/rapid_ocr_model.py index 16977e83..08e82428 100644 --- a/docling/models/rapid_ocr_model.py +++ b/docling/models/rapid_ocr_model.py @@ -42,10 +42,10 @@ class RapidOcrModel(BaseOcrModel): if self.enabled: try: - from rapidocr_onnxruntime import RapidOCR # type: ignore + from rapidocr import RapidOCR # type: ignore except ImportError: raise ImportError( - "RapidOCR is not installed. Please install it via `pip install rapidocr_onnxruntime` to use this OCR engine. " + "RapidOCR is not installed. Please install it via `pip install rapidocr onnxruntime` to use this OCR engine. " "Alternatively, Docling has support for other OCR engines. See the documentation." ) @@ -56,19 +56,30 @@ class RapidOcrModel(BaseOcrModel): intra_op_num_threads = accelerator_options.num_threads self.reader = RapidOCR( - text_score=self.options.text_score, - cls_use_cuda=use_cuda, - rec_use_cuda=use_cuda, - det_use_cuda=use_cuda, - det_use_dml=use_dml, - cls_use_dml=use_dml, - rec_use_dml=use_dml, - intra_op_num_threads=intra_op_num_threads, - print_verbose=self.options.print_verbose, - det_model_path=self.options.det_model_path, - cls_model_path=self.options.cls_model_path, - rec_model_path=self.options.rec_model_path, - rec_keys_path=self.options.rec_keys_path, + params={ + # Global settings (these are still correct) + "Global.text_score": self.options.text_score, + #"Global.verbose": self.options.print_verbose, + + # Detection model settings + "Det.model_path": self.options.det_model_path, + "Det.use_cuda": use_cuda, + "Det.use_dml": use_dml, + "Det.intra_op_num_threads": intra_op_num_threads, + + # Classification model settings + "Cls.model_path": self.options.cls_model_path, + "Cls.use_cuda": use_cuda, + "Cls.use_dml": use_dml, + "Cls.intra_op_num_threads": intra_op_num_threads, + + # Recognition model settings + "Rec.model_path": self.options.rec_model_path, + "Rec.keys_path": self.options.rec_keys_path, + "Rec.use_cuda": use_cuda, + "Rec.use_dml": use_dml, + "Rec.intra_op_num_threads": intra_op_num_threads, + } ) def __call__( diff --git a/docs/examples/rapidocr_with_custom_models.py b/docs/examples/rapidocr_with_custom_models.py index e6dd3963..db2d8874 100644 --- a/docs/examples/rapidocr_with_custom_models.py +++ b/docs/examples/rapidocr_with_custom_models.py @@ -1,6 +1,6 @@ import os -from huggingface_hub import snapshot_download +from modelscope import snapshot_download from docling.datamodel.pipeline_options import PdfPipelineOptions, RapidOcrOptions from docling.document_converter import ( @@ -17,17 +17,17 @@ def main(): # Download RappidOCR models from HuggingFace print("Downloading RapidOCR models") - download_path = snapshot_download(repo_id="SWHL/RapidOCR") + download_path = snapshot_download(repo_id="RapidAI/RapidOCR") # Setup RapidOcrOptions for english detection det_model_path = os.path.join( - download_path, "PP-OCRv4", "en_PP-OCRv3_det_infer.onnx" + download_path, "onnx", "PP-OCRv4","det", "en_PP-OCRv3_det_infer.onnx" ) rec_model_path = os.path.join( - download_path, "PP-OCRv4", "ch_PP-OCRv4_rec_server_infer.onnx" + download_path, "onnx", "PP-OCRv5", "rec", "ch_PP-OCRv5_rec_server_infer.onnx" ) cls_model_path = os.path.join( - download_path, "PP-OCRv3", "ch_ppocr_mobile_v2.0_cls_train.onnx" + download_path, "onnx", "PP-OCRv4", "cls", "ch_ppocr_mobile_v2.0_cls_infer.onnx" ) ocr_options = RapidOcrOptions( det_model_path=det_model_path, diff --git a/docs/installation/index.md b/docs/installation/index.md index 38fba4c8..10b9811f 100644 --- a/docs/installation/index.md +++ b/docs/installation/index.md @@ -31,7 +31,7 @@ Works on macOS, Linux, and Windows, with support for both x86_64 and arm64 archi | Tesseract | System dependency. See description for Tesseract and Tesserocr below. | `TesseractOcrOptions` | | Tesseract CLI | System dependency. See description below. | `TesseractCliOcrOptions` | | OcrMac | System dependency. See description below. | `OcrMacOptions` | - | [RapidOCR](https://github.com/RapidAI/RapidOCR) | Extra feature not included in Default Docling installation can be installed via `pip install rapidocr_onnxruntime` | `RapidOcrOptions` | + | [RapidOCR](https://github.com/RapidAI/RapidOCR) | Extra feature not included in Default Docling installation can be installed via `pip install rapidocr onnxruntime` | `RapidOcrOptions` | | [OnnxTR](https://github.com/felixdittrich92/OnnxTR) | Can be installed via the plugin system `pip install "docling-ocr-onnxtr[cpu]"`. Please take a look at [docling-OCR-OnnxTR](https://github.com/felixdittrich92/docling-OCR-OnnxTR).| `OnnxtrOcrOptions` | The Docling `DocumentConverter` allows to choose the OCR engine with the `ocr_options` settings. For example diff --git a/pyproject.toml b/pyproject.toml index 5302ddd3..a09d3300 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,8 +96,9 @@ vlm = [ 'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64"', ] rapidocr = [ - 'rapidocr-onnxruntime (>=1.4.0,<2.0.0) ; python_version < "3.13"', + 'rapidocr (>=3.3,<4.0.0) ; python_version < "3.14"', 'onnxruntime (>=1.7.0,<2.0.0)', + "modelscope>=1.29.0", # 'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"', # 'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"', ]