From 7e18637a35c6786c90bc41b40607404f4b084b45 Mon Sep 17 00:00:00 2001
From: Georg Heiler <georg.kf.heiler@gmail.com>
Date: Mon, 18 Aug 2025 07:16:31 +0200
Subject: [PATCH] feat: exploring new version

---
 docling/models/rapid_ocr_model.py            | 41 +++++++++++++-------
 docs/examples/rapidocr_with_custom_models.py | 10 ++---
 docs/installation/index.md                   |  2 +-
 pyproject.toml                               |  3 +-
 4 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/docling/models/rapid_ocr_model.py b/docling/models/rapid_ocr_model.py
index 16977e83..08e82428 100644
--- a/docling/models/rapid_ocr_model.py
+++ b/docling/models/rapid_ocr_model.py
@@ -42,10 +42,10 @@ class RapidOcrModel(BaseOcrModel):
 
         if self.enabled:
             try:
-                from rapidocr_onnxruntime import RapidOCR  # type: ignore
+                from rapidocr import RapidOCR  # type: ignore
             except ImportError:
                 raise ImportError(
-                    "RapidOCR is not installed. Please install it via `pip install rapidocr_onnxruntime` to use this OCR engine. "
+                    "RapidOCR is not installed. Please install it via `pip install rapidocr onnxruntime` to use this OCR engine. "
                     "Alternatively, Docling has support for other OCR engines. See the documentation."
                 )
 
@@ -56,19 +56,30 @@ class RapidOcrModel(BaseOcrModel):
             intra_op_num_threads = accelerator_options.num_threads
 
             self.reader = RapidOCR(
-                text_score=self.options.text_score,
-                cls_use_cuda=use_cuda,
-                rec_use_cuda=use_cuda,
-                det_use_cuda=use_cuda,
-                det_use_dml=use_dml,
-                cls_use_dml=use_dml,
-                rec_use_dml=use_dml,
-                intra_op_num_threads=intra_op_num_threads,
-                print_verbose=self.options.print_verbose,
-                det_model_path=self.options.det_model_path,
-                cls_model_path=self.options.cls_model_path,
-                rec_model_path=self.options.rec_model_path,
-                rec_keys_path=self.options.rec_keys_path,
+                params={
+                    # Global settings (these are still correct)
+                    "Global.text_score": self.options.text_score,
+                    #"Global.verbose": self.options.print_verbose,
+
+                    # Detection model settings
+                    "Det.model_path": self.options.det_model_path,
+                    "Det.use_cuda": use_cuda,
+                    "Det.use_dml": use_dml,
+                    "Det.intra_op_num_threads": intra_op_num_threads,
+
+                    # Classification model settings
+                    "Cls.model_path": self.options.cls_model_path,
+                    "Cls.use_cuda": use_cuda,
+                    "Cls.use_dml": use_dml,
+                    "Cls.intra_op_num_threads": intra_op_num_threads,
+
+                    # Recognition model settings
+                    "Rec.model_path": self.options.rec_model_path,
+                    "Rec.keys_path": self.options.rec_keys_path,
+                    "Rec.use_cuda": use_cuda,
+                    "Rec.use_dml": use_dml,
+                    "Rec.intra_op_num_threads": intra_op_num_threads,
+                }
             )
 
     def __call__(
diff --git a/docs/examples/rapidocr_with_custom_models.py b/docs/examples/rapidocr_with_custom_models.py
index e6dd3963..db2d8874 100644
--- a/docs/examples/rapidocr_with_custom_models.py
+++ b/docs/examples/rapidocr_with_custom_models.py
@@ -1,6 +1,6 @@
 import os
 
-from huggingface_hub import snapshot_download
+from modelscope import snapshot_download
 
 from docling.datamodel.pipeline_options import PdfPipelineOptions, RapidOcrOptions
 from docling.document_converter import (
@@ -17,17 +17,17 @@ def main():
 
     # Download RappidOCR models from HuggingFace
     print("Downloading RapidOCR models")
-    download_path = snapshot_download(repo_id="SWHL/RapidOCR")
+    download_path = snapshot_download(repo_id="RapidAI/RapidOCR")
 
     # Setup RapidOcrOptions for english detection
     det_model_path = os.path.join(
-        download_path, "PP-OCRv4", "en_PP-OCRv3_det_infer.onnx"
+        download_path, "onnx", "PP-OCRv4","det", "en_PP-OCRv3_det_infer.onnx"
     )
     rec_model_path = os.path.join(
-        download_path, "PP-OCRv4", "ch_PP-OCRv4_rec_server_infer.onnx"
+        download_path, "onnx", "PP-OCRv5", "rec", "ch_PP-OCRv5_rec_server_infer.onnx"
     )
     cls_model_path = os.path.join(
-        download_path, "PP-OCRv3", "ch_ppocr_mobile_v2.0_cls_train.onnx"
+        download_path, "onnx", "PP-OCRv4", "cls", "ch_ppocr_mobile_v2.0_cls_infer.onnx"
     )
     ocr_options = RapidOcrOptions(
         det_model_path=det_model_path,
diff --git a/docs/installation/index.md b/docs/installation/index.md
index 38fba4c8..10b9811f 100644
--- a/docs/installation/index.md
+++ b/docs/installation/index.md
@@ -31,7 +31,7 @@ Works on macOS, Linux, and Windows, with support for both x86_64 and arm64 archi
     | Tesseract | System dependency. See description for Tesseract and Tesserocr below.  | `TesseractOcrOptions` |
     | Tesseract CLI | System dependency. See description below. | `TesseractCliOcrOptions` |
     | OcrMac | System dependency. See description below. | `OcrMacOptions` |
-    | [RapidOCR](https://github.com/RapidAI/RapidOCR) | Extra feature not included in Default Docling installation can be installed via `pip install rapidocr_onnxruntime` | `RapidOcrOptions` |
+    | [RapidOCR](https://github.com/RapidAI/RapidOCR) | Extra feature not included in Default Docling installation can be installed via `pip install rapidocr onnxruntime` | `RapidOcrOptions` |
     | [OnnxTR](https://github.com/felixdittrich92/OnnxTR) | Can be installed via the plugin system `pip install "docling-ocr-onnxtr[cpu]"`. Please take a look at [docling-OCR-OnnxTR](https://github.com/felixdittrich92/docling-OCR-OnnxTR).| `OnnxtrOcrOptions` |
 
     The Docling `DocumentConverter` allows to choose the OCR engine with the `ocr_options` settings. For example
diff --git a/pyproject.toml b/pyproject.toml
index 5302ddd3..a09d3300 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -96,8 +96,9 @@ vlm = [
   'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "linux" and platform_machine == "x86_64"',
 ]
 rapidocr = [
-  'rapidocr-onnxruntime (>=1.4.0,<2.0.0) ; python_version < "3.13"',
+  'rapidocr (>=3.3,<4.0.0) ; python_version < "3.14"',
   'onnxruntime (>=1.7.0,<2.0.0)',
+    "modelscope>=1.29.0",
   # 'onnxruntime (>=1.7.0,<2.0.0) ; python_version >= "3.10"',
   # 'onnxruntime (>=1.7.0,<1.20.0) ; python_version < "3.10"',
 ]