From f6560cf6623251be8c7e2cf20e1597cc1b7a829c Mon Sep 17 00:00:00 2001 From: felix Date: Sat, 22 Mar 2025 13:36:26 +0100 Subject: [PATCH] feat(ocr): Add OnnxTR as possible OCR engine Signed-off-by: felix --- docs/examples/full_page_ocr.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/examples/full_page_ocr.py b/docs/examples/full_page_ocr.py index 8390d5fc..9252b574 100644 --- a/docs/examples/full_page_ocr.py +++ b/docs/examples/full_page_ocr.py @@ -5,6 +5,7 @@ from docling.datamodel.base_models import InputFormat from docling.datamodel.pipeline_options import ( EasyOcrOptions, OcrMacOptions, + OnnxtrOcrOptions, PdfPipelineOptions, RapidOcrOptions, TesseractCliOcrOptions, @@ -21,8 +22,9 @@ def main(): pipeline_options.do_table_structure = True pipeline_options.table_structure_options.do_cell_matching = True - # Any of the OCR options can be used:EasyOcrOptions, TesseractOcrOptions, TesseractCliOcrOptions, OcrMacOptions(Mac only), RapidOcrOptions + # Any of the OCR options can be used:EasyOcrOptions, TesseractOcrOptions, TesseractCliOcrOptions, OcrMacOptions(Mac only), RapidOcrOptions, OnnxtrOcrOptions # ocr_options = EasyOcrOptions(force_full_page_ocr=True) + # ocr_options = OnnxtrOcrOptions(force_full_page_ocr=True) # ocr_options = TesseractOcrOptions(force_full_page_ocr=True) # ocr_options = OcrMacOptions(force_full_page_ocr=True) # ocr_options = RapidOcrOptions(force_full_page_ocr=True)