feat: exploring new version

This commit is contained in:
Georg Heiler
2025-08-18 07:16:31 +02:00
parent 2aef5cf328
commit 7e18637a35
4 changed files with 34 additions and 22 deletions

View File

@@ -1,6 +1,6 @@
import os
from huggingface_hub import snapshot_download
from modelscope import snapshot_download
from docling.datamodel.pipeline_options import PdfPipelineOptions, RapidOcrOptions
from docling.document_converter import (
@@ -17,17 +17,17 @@ def main():
# Download RappidOCR models from HuggingFace
print("Downloading RapidOCR models")
download_path = snapshot_download(repo_id="SWHL/RapidOCR")
download_path = snapshot_download(repo_id="RapidAI/RapidOCR")
# Setup RapidOcrOptions for english detection
det_model_path = os.path.join(
download_path, "PP-OCRv4", "en_PP-OCRv3_det_infer.onnx"
download_path, "onnx", "PP-OCRv4","det", "en_PP-OCRv3_det_infer.onnx"
)
rec_model_path = os.path.join(
download_path, "PP-OCRv4", "ch_PP-OCRv4_rec_server_infer.onnx"
download_path, "onnx", "PP-OCRv5", "rec", "ch_PP-OCRv5_rec_server_infer.onnx"
)
cls_model_path = os.path.join(
download_path, "PP-OCRv3", "ch_ppocr_mobile_v2.0_cls_train.onnx"
download_path, "onnx", "PP-OCRv4", "cls", "ch_ppocr_mobile_v2.0_cls_infer.onnx"
)
ocr_options = RapidOcrOptions(
det_model_path=det_model_path,