feat: exploring new version

2025-12-08 20:58:11 +00:00 · 2025-08-18 07:16:31 +02:00
parent 2aef5cf328
commit 7e18637a35
4 changed files with 34 additions and 22 deletions
--- a/docs/examples/rapidocr_with_custom_models.py
+++ b/docs/examples/rapidocr_with_custom_models.py
@@ -1,6 +1,6 @@
 import os

-from huggingface_hub import snapshot_download
+from modelscope import snapshot_download

 from docling.datamodel.pipeline_options import PdfPipelineOptions, RapidOcrOptions
 from docling.document_converter import (
@@ -17,17 +17,17 @@ def main():

    # Download RappidOCR models from HuggingFace
    print("Downloading RapidOCR models")
-    download_path = snapshot_download(repo_id="SWHL/RapidOCR")
+    download_path = snapshot_download(repo_id="RapidAI/RapidOCR")

    # Setup RapidOcrOptions for english detection
    det_model_path = os.path.join(
-        download_path, "PP-OCRv4", "en_PP-OCRv3_det_infer.onnx"
+        download_path, "onnx", "PP-OCRv4","det", "en_PP-OCRv3_det_infer.onnx"
    )
    rec_model_path = os.path.join(
-        download_path, "PP-OCRv4", "ch_PP-OCRv4_rec_server_infer.onnx"
+        download_path, "onnx", "PP-OCRv5", "rec", "ch_PP-OCRv5_rec_server_infer.onnx"
    )
    cls_model_path = os.path.join(
-        download_path, "PP-OCRv3", "ch_ppocr_mobile_v2.0_cls_train.onnx"
+        download_path, "onnx", "PP-OCRv4", "cls", "ch_ppocr_mobile_v2.0_cls_infer.onnx"
    )
    ocr_options = RapidOcrOptions(
        det_model_path=det_model_path,
--- a/docs/installation/index.md
+++ b/docs/installation/index.md
@@ -31,7 +31,7 @@ Works on macOS, Linux, and Windows, with support for both x86_64 and arm64 archi
    | Tesseract | System dependency. See description for Tesseract and Tesserocr below.  | `TesseractOcrOptions` |
    | Tesseract CLI | System dependency. See description below. | `TesseractCliOcrOptions` |
    | OcrMac | System dependency. See description below. | `OcrMacOptions` |
-    | [RapidOCR](https://github.com/RapidAI/RapidOCR) | Extra feature not included in Default Docling installation can be installed via `pip install rapidocr_onnxruntime` | `RapidOcrOptions` |
+    | [RapidOCR](https://github.com/RapidAI/RapidOCR) | Extra feature not included in Default Docling installation can be installed via `pip install rapidocr onnxruntime` | `RapidOcrOptions` |
    | [OnnxTR](https://github.com/felixdittrich92/OnnxTR) | Can be installed via the plugin system `pip install "docling-ocr-onnxtr[cpu]"`. Please take a look at [docling-OCR-OnnxTR](https://github.com/felixdittrich92/docling-OCR-OnnxTR).| `OnnxtrOcrOptions` |

    The Docling `DocumentConverter` allows to choose the OCR engine with the `ocr_options` settings. For example