From edbabfcac2fd53345b1a0677e81f206285d58bae Mon Sep 17 00:00:00 2001 From: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Date: Mon, 8 Dec 2025 12:44:53 +0100 Subject: [PATCH] fix: add missing font download in the rapidocr artifacts (#2735) add font in the rapidocr downloads Signed-off-by: Michele Dolfi --- docling/models/rapid_ocr_model.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/docling/models/rapid_ocr_model.py b/docling/models/rapid_ocr_model.py index 2dd5feb4..21828457 100644 --- a/docling/models/rapid_ocr_model.py +++ b/docling/models/rapid_ocr_model.py @@ -24,7 +24,7 @@ _log = logging.getLogger(__name__) _ModelPathEngines = Literal["onnxruntime", "torch"] _ModelPathTypes = Literal[ - "det_model_path", "cls_model_path", "rec_model_path", "rec_keys_path" + "det_model_path", "cls_model_path", "rec_model_path", "rec_keys_path", "font_path" ] @@ -58,6 +58,10 @@ class RapidOcrModel(BaseOcrModel): "url": "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v2.0.7/paddle/PP-OCRv4/rec/ch_PP-OCRv4_rec_infer/ppocr_keys_v1.txt", "path": "paddle/PP-OCRv4/rec/ch_PP-OCRv4_rec_infer/ppocr_keys_v1.txt", }, + "font_path": { + "url": "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.4.0/resources/fonts/FZYTK.TTF", + "path": "fonts/FZYTK.TTF", + }, }, "torch": { "det_model_path": { @@ -76,6 +80,10 @@ class RapidOcrModel(BaseOcrModel): "url": "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.4.0/paddle/PP-OCRv4/rec/ch_PP-OCRv4_rec_infer/ppocr_keys_v1.txt", "path": "paddle/PP-OCRv4/rec/ch_PP-OCRv4_rec_infer/ppocr_keys_v1.txt", }, + "font_path": { + "url": "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.4.0/resources/fonts/FZYTK.TTF", + "path": "fonts/FZYTK.TTF", + }, }, } @@ -125,6 +133,7 @@ class RapidOcrModel(BaseOcrModel): cls_model_path = self.options.cls_model_path rec_model_path = self.options.rec_model_path rec_keys_path = self.options.rec_keys_path + font_path = self.options.font_path if artifacts_path is not None: det_model_path = ( det_model_path @@ -150,12 +159,19 @@ class RapidOcrModel(BaseOcrModel): / self._model_repo_folder / self._default_models[backend_enum.value]["rec_keys_path"]["path"] ) + font_path = ( + font_path + or artifacts_path + / self._model_repo_folder + / self._default_models[backend_enum.value]["font_path"]["path"] + ) for model_path in ( rec_keys_path, cls_model_path, rec_model_path, rec_keys_path, + font_path, ): if model_path is None: continue