diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index ecd04b8e..c923b691 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -99,6 +99,8 @@ class RapidOcrOptions(OcrOptions): # For more details on the following options visit # https://rapidai.github.io/RapidOCRDocs/install_usage/api/RapidOCR/ + # https://rapidai.github.io/RapidOCRDocs/main/install_usage/rapidocr/usage/#__tabbed_3_4 + backend: Literal["onnxruntime", "openvino", "paddle", "torch"] = "onnxruntime" text_score: float = 0.5 # same default as rapidocr use_det: Optional[bool] = None # same default as rapidocr diff --git a/docling/models/rapid_ocr_model.py b/docling/models/rapid_ocr_model.py index ed4835b1..038330fd 100644 --- a/docling/models/rapid_ocr_model.py +++ b/docling/models/rapid_ocr_model.py @@ -42,7 +42,7 @@ class RapidOcrModel(BaseOcrModel): if self.enabled: try: - from rapidocr import RapidOCR # type: ignore + from rapidocr import RapidOCR, EngineType # type: ignore except ImportError: raise ImportError( "RapidOCR is not installed. Please install it via `pip install rapidocr onnxruntime` to use this OCR engine. " @@ -54,6 +54,13 @@ class RapidOcrModel(BaseOcrModel): use_cuda = str(AcceleratorDevice.CUDA.value).lower() in device use_dml = accelerator_options.device == AcceleratorDevice.AUTO intra_op_num_threads = accelerator_options.num_threads + _ALIASES = { + "onnxruntime": EngineType.ONNXRUNTIME, + "openvino": EngineType.OPENVINO, + "paddle": EngineType.PADDLE, + "torch": EngineType.TORCH, + } + backend_enum = _ALIASES.get(self.options.backend, EngineType.TORCH) self.reader = RapidOCR( params={ @@ -76,6 +83,9 @@ class RapidOcrModel(BaseOcrModel): "Rec.use_cuda": use_cuda, "Rec.use_dml": use_dml, "Rec.intra_op_num_threads": intra_op_num_threads, + "Det.engine_type": backend_enum, + "Cls.engine_type": backend_enum, + "Rec.engine_type": backend_enum, } ) @@ -103,12 +113,15 @@ class RapidOcrModel(BaseOcrModel): scale=self.scale, cropbox=ocr_rect ) im = numpy.array(high_res_image) - result, _ = self.reader( + result = self.reader( im, use_det=self.options.use_det, use_cls=self.options.use_cls, use_rec=self.options.use_rec, ) + result = list( + zip(result.boxes.tolist(), result.txts, result.scores) + ) del high_res_image del im