diff --git a/docling/cli/main.py b/docling/cli/main.py index 6119fd89..d6f51b74 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -27,10 +27,10 @@ from docling.datamodel.pipeline_options import ( OcrMacOptions, OcrOptions, PdfPipelineOptions, + RapidOcrOptions, TableFormerMode, TesseractCliOcrOptions, TesseractOcrOptions, - RapidOcrOptions ) from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 6426f754..50fb4cab 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -28,19 +28,23 @@ class OcrOptions(BaseModel): 0.05 # percentage of the area for a bitmap to processed with OCR ) + class RapidOcrOptions(OcrOptions): kind: Literal["rapidocr"] = "rapidocr" - # English and chinese are the most commly used models and have been tested with RapidOCR. - lang: List[str] = ["english", "chinese"] # However, language as a parameter is not supported by rapidocr yet and hence changing this options doesn't affect anything. + # English and chinese are the most commly used models and have been tested with RapidOCR. + lang: List[str] = [ + "english", + "chinese", + ] # However, language as a parameter is not supported by rapidocr yet and hence changing this options doesn't affect anything. # For more details on supported languages by RapidOCR visit https://rapidai.github.io/RapidOCRDocs/blog/2022/09/28/%E6%94%AF%E6%8C%81%E8%AF%86%E5%88%AB%E8%AF%AD%E8%A8%80/ # For more details on the following options visit https://rapidai.github.io/RapidOCRDocs/install_usage/api/RapidOCR/ - text_score: float = 0.5 # same default as rapidocr + text_score: float = 0.5 # same default as rapidocr - use_det: Optional[bool] = None # same default as rapidocr - use_cls: Optional[bool] = None # same default as rapidocr - use_rec: Optional[bool] = None # same default as rapidocr + use_det: Optional[bool] = None # same default as rapidocr + use_cls: Optional[bool] = None # same default as rapidocr + use_rec: Optional[bool] = None # same default as rapidocr det_use_cuda: bool = False # same default as rapidocr cls_use_cuda: bool = False # same default as rapidocr @@ -50,16 +54,17 @@ class RapidOcrOptions(OcrOptions): cls_use_dml: bool = False # same default as rapidocr rec_use_dml: bool = False # same default as rapidocr - print_verbose: bool = False # same default as rapidocr - - det_model_path: Optional[str] = None # same default as rapidocr - cls_model_path: Optional[str] = None # same default as rapidocr - rec_model_path: Optional[str] = None # same default as rapidocr + print_verbose: bool = False # same default as rapidocr + + det_model_path: Optional[str] = None # same default as rapidocr + cls_model_path: Optional[str] = None # same default as rapidocr + rec_model_path: Optional[str] = None # same default as rapidocr model_config = ConfigDict( extra="forbid", ) + class EasyOcrOptions(OcrOptions): kind: Literal["easyocr"] = "easyocr" lang: List[str] = ["fr", "de", "es", "en"] diff --git a/docling/models/rapid_ocr_model.py b/docling/models/rapid_ocr_model.py index 47527151..1e12a106 100644 --- a/docling/models/rapid_ocr_model.py +++ b/docling/models/rapid_ocr_model.py @@ -23,7 +23,7 @@ class RapidOcrModel(BaseOcrModel): if self.enabled: try: - from rapidocr_onnxruntime import RapidOCR + from rapidocr_onnxruntime import RapidOCR # type: ignore except ImportError: raise ImportError( "RapidOCR is not installed. Please install it via `pip install rapidocr_onnxruntime` to use this OCR engine. " @@ -31,17 +31,17 @@ class RapidOcrModel(BaseOcrModel): ) self.reader = RapidOCR( - text_score = self.options.text_score, - cls_use_cuda = self.options.cls_use_cuda, - rec_use_cuda = self.options.rec_use_cuda, - det_use_cuda = self.options.det_use_cuda, - det_use_dml = self.options.det_use_dml, - cls_use_dml = self.options.cls_use_dml, - rec_use_dml = self.options.rec_use_dml, - print_verbose = self.options.print_verbose, - det_model_path = self.options.det_model_path, - cls_model_path = self.options.cls_model_path, - rec_model_path = self.options.rec_model_path, + text_score=self.options.text_score, + cls_use_cuda=self.options.cls_use_cuda, + rec_use_cuda=self.options.rec_use_cuda, + det_use_cuda=self.options.det_use_cuda, + det_use_dml=self.options.det_use_dml, + cls_use_dml=self.options.cls_use_dml, + rec_use_dml=self.options.rec_use_dml, + print_verbose=self.options.print_verbose, + det_model_path=self.options.det_model_path, + cls_model_path=self.options.cls_model_path, + rec_model_path=self.options.rec_model_path, ) def __call__( @@ -70,7 +70,12 @@ class RapidOcrModel(BaseOcrModel): scale=self.scale, cropbox=ocr_rect ) im = numpy.array(high_res_image) - result, _ = self.reader(im, use_det=self.options.use_det, use_cls=self.options.use_cls, use_rec=self.options.use_rec) + result, _ = self.reader( + im, + use_det=self.options.use_det, + use_cls=self.options.use_cls, + use_rec=self.options.use_rec, + ) del high_res_image del im @@ -101,4 +106,4 @@ class RapidOcrModel(BaseOcrModel): if settings.debug.visualize_ocr: self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects) - yield page \ No newline at end of file + yield page diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py index 0bed0b90..40105a38 100644 --- a/docling/pipeline/standard_pdf_pipeline.py +++ b/docling/pipeline/standard_pdf_pipeline.py @@ -13,9 +13,9 @@ from docling.datamodel.pipeline_options import ( EasyOcrOptions, OcrMacOptions, PdfPipelineOptions, + RapidOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, - RapidOcrOptions ) from docling.models.base_ocr_model import BaseOcrModel from docling.models.ds_glm_model import GlmModel, GlmOptions @@ -27,10 +27,10 @@ from docling.models.page_preprocessing_model import ( PagePreprocessingModel, PagePreprocessingOptions, ) +from docling.models.rapid_ocr_model import RapidOcrModel from docling.models.table_structure_model import TableStructureModel from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel from docling.models.tesseract_ocr_model import TesseractOcrModel -from docling.models.rapid_ocr_model import RapidOcrModel from docling.pipeline.base_pipeline import PaginatedPipeline from docling.utils.profiling import ProfilingScope, TimeRecorder diff --git a/docs/examples/full_page_ocr.py b/docs/examples/full_page_ocr.py index 0e3597cb..967910dc 100644 --- a/docs/examples/full_page_ocr.py +++ b/docs/examples/full_page_ocr.py @@ -6,9 +6,9 @@ from docling.datamodel.pipeline_options import ( EasyOcrOptions, OcrMacOptions, PdfPipelineOptions, + RapidOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, - RapidOcrOptions ) from docling.document_converter import DocumentConverter, PdfFormatOption diff --git a/tests/test_e2e_ocr_conversion.py b/tests/test_e2e_ocr_conversion.py index 0733a230..73a943af 100644 --- a/tests/test_e2e_ocr_conversion.py +++ b/tests/test_e2e_ocr_conversion.py @@ -10,9 +10,9 @@ from docling.datamodel.pipeline_options import ( OcrMacOptions, OcrOptions, PdfPipelineOptions, + RapidOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, - RapidOcrOptions ) from docling.document_converter import DocumentConverter, PdfFormatOption @@ -61,7 +61,7 @@ def test_e2e_conversions(): EasyOcrOptions(force_full_page_ocr=True), TesseractOcrOptions(force_full_page_ocr=True), TesseractCliOcrOptions(force_full_page_ocr=True), - RapidOcrOptions(force_full_page_ocr=True) + RapidOcrOptions(force_full_page_ocr=True), ] # only works on mac