feat(RapidOcr): Support generic extra arguments for RapidOcr (#2266)

* feat: add support for additional parameters in RapidOcrOptions and fix RapidOcr font_path

* DCO Remediation Commit for David Morady <29502285+dmorady1@users.noreply.github.com>

I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: 133d989060

Signed-off-by: David Morady <29502285+dmorady1@users.noreply.github.com>

* fix: RapidOcr ensure backwards compatibility and add deprecation note

* add warning log for rec_font_path

* DCO Remediation Commit for David Morady <29502285+dmorady1@users.noreply.github.com>

I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: 133d989060
I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: 0a65eed28a
I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: ac96f1483f

Signed-off-by: David Morady <29502285+dmorady1@users.noreply.github.com>

* add tests for code coverage for rapidocr

* DCO Remediation Commit for David Morady <29502285+dmorady1@users.noreply.github.com>

I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: 133d989060
I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: 0a65eed28a
I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: ac96f1483f
I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: af5df4bb30

Signed-off-by: David Morady <29502285+dmorady1@users.noreply.github.com>

* add small comment for test

* DCO Remediation Commit for David Morady <29502285+dmorady1@users.noreply.github.com>

I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: 133d989060
I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: 0a65eed28a
I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: ac96f1483f
I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: af5df4bb30
I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: ab893b637f

Signed-off-by: David Morady <29502285+dmorady1@users.noreply.github.com>

* fix test  comment

* DCO Remediation Commit for David Morady <29502285+dmorady1@users.noreply.github.com>

I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: 133d989060
I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: 0a65eed28a
I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: ac96f1483f
I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: af5df4bb30
I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: ab893b637f
I, David Morady <29502285+dmorady1@users.noreply.github.com>, hereby add my Signed-off-by to this commit: 028e332aa9

Signed-off-by: David Morady <29502285+dmorady1@users.noreply.github.com>

---------

Signed-off-by: David Morady <29502285+dmorady1@users.noreply.github.com>
This commit is contained in:
dmorady1
2025-09-16 07:26:10 +02:00
committed by GitHub
parent ad2f738231
commit 0e95171dd6
3 changed files with 52 additions and 26 deletions

View File

@@ -114,7 +114,11 @@ class RapidOcrOptions(OcrOptions):
cls_model_path: Optional[str] = None # same default as rapidocr
rec_model_path: Optional[str] = None # same default as rapidocr
rec_keys_path: Optional[str] = None # same default as rapidocr
rec_font_path: Optional[str] = None # same default as rapidocr
rec_font_path: Optional[str] = None # Deprecated, please use font_path instead
font_path: Optional[str] = None # same default as rapidocr
# Dictionary to overwrite or pass-through additional parameters
rapidocr_params: Dict[str, Any] = Field(default_factory=dict)
model_config = ConfigDict(
extra="forbid",

View File

@@ -62,32 +62,44 @@ class RapidOcrModel(BaseOcrModel):
}
backend_enum = _ALIASES.get(self.options.backend, EngineType.ONNXRUNTIME)
params = {
# Global settings (these are still correct)
"Global.text_score": self.options.text_score,
"Global.font_path": self.options.font_path,
# "Global.verbose": self.options.print_verbose,
# Detection model settings
"Det.model_path": self.options.det_model_path,
"Det.use_cuda": use_cuda,
"Det.use_dml": use_dml,
"Det.intra_op_num_threads": intra_op_num_threads,
# Classification model settings
"Cls.model_path": self.options.cls_model_path,
"Cls.use_cuda": use_cuda,
"Cls.use_dml": use_dml,
"Cls.intra_op_num_threads": intra_op_num_threads,
# Recognition model settings
"Rec.model_path": self.options.rec_model_path,
"Rec.font_path": self.options.rec_font_path,
"Rec.keys_path": self.options.rec_keys_path,
"Rec.use_cuda": use_cuda,
"Rec.use_dml": use_dml,
"Rec.intra_op_num_threads": intra_op_num_threads,
"Det.engine_type": backend_enum,
"Cls.engine_type": backend_enum,
"Rec.engine_type": backend_enum,
}
if self.options.rec_font_path is not None:
_log.warning(
"The 'rec_font_path' option for RapidOCR is deprecated. Please use 'font_path' instead."
)
user_params = self.options.rapidocr_params
if user_params:
_log.debug("Overwriting RapidOCR params with user-provided values.")
params.update(user_params)
self.reader = RapidOCR(
params={
# Global settings (these are still correct)
"Global.text_score": self.options.text_score,
# "Global.verbose": self.options.print_verbose,
# Detection model settings
"Det.model_path": self.options.det_model_path,
"Det.use_cuda": use_cuda,
"Det.use_dml": use_dml,
"Det.intra_op_num_threads": intra_op_num_threads,
# Classification model settings
"Cls.model_path": self.options.cls_model_path,
"Cls.use_cuda": use_cuda,
"Cls.use_dml": use_dml,
"Cls.intra_op_num_threads": intra_op_num_threads,
# Recognition model settings
"Rec.model_path": self.options.rec_model_path,
"Rec.font_path": self.options.rec_font_path,
"Rec.keys_path": self.options.rec_keys_path,
"Rec.use_cuda": use_cuda,
"Rec.use_dml": use_dml,
"Rec.intra_op_num_threads": intra_op_num_threads,
"Det.engine_type": backend_enum,
"Cls.engine_type": backend_enum,
"Rec.engine_type": backend_enum,
}
params=params,
)
def __call__(

View File

@@ -74,6 +74,16 @@ def test_e2e_conversions():
if sys.version_info < (3, 13):
engines.append((RapidOcrOptions(), False))
engines.append((RapidOcrOptions(force_full_page_ocr=True), False))
engines.append(
(
RapidOcrOptions(
force_full_page_ocr=True,
rec_font_path="test",
rapidocr_params={"Rec.font_path": None}, # overwrites rec_font_path
),
False,
)
)
# only works on mac
if "darwin" == sys.platform: