From 7234dc3a42d2a9be41174ec49638e90671fe3486 Mon Sep 17 00:00:00 2001 From: Nikos Livathinos Date: Mon, 11 Nov 2024 14:10:56 +0100 Subject: [PATCH] feat: Introduce the force-ocr cmd parameter in docling cli. Add the full_page_ocr.py example in mkdocs Signed-off-by: Nikos Livathinos --- docling/cli/main.py | 13 ++++++++++--- docs/examples/full_page_ocr.py | 1 - mkdocs.yml | 1 + 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/docling/cli/main.py b/docling/cli/main.py index 35ae01df..60a3c296 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -153,6 +153,13 @@ def convert( ..., help="If enabled, the bitmap content will be processed using OCR." ), ] = True, + force_ocr: Annotated[ + bool, + typer.Option( + ..., + help="Replace any existing text with OCR generated text over the full content.", + ), + ] = False, ocr_engine: Annotated[ OcrEngine, typer.Option(..., help="The OCR engine to use.") ] = OcrEngine.EASYOCR, @@ -219,11 +226,11 @@ def convert( match ocr_engine: case OcrEngine.EASYOCR: - ocr_options: OcrOptions = EasyOcrOptions() + ocr_options: OcrOptions = EasyOcrOptions(force_full_page_ocr=force_ocr) case OcrEngine.TESSERACT_CLI: - ocr_options = TesseractCliOcrOptions() + ocr_options = TesseractCliOcrOptions(force_full_page_ocr=force_ocr) case OcrEngine.TESSERACT: - ocr_options = TesseractOcrOptions() + ocr_options = TesseractOcrOptions(force_full_page_ocr=force_ocr) case _: raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}") diff --git a/docs/examples/full_page_ocr.py b/docs/examples/full_page_ocr.py index 0a61507e..35c2ba6b 100644 --- a/docs/examples/full_page_ocr.py +++ b/docs/examples/full_page_ocr.py @@ -29,7 +29,6 @@ def main(): format_options={ InputFormat.PDF: PdfFormatOption( pipeline_options=pipeline_options, - backend=DoclingParseDocumentBackend, ) } ) diff --git a/mkdocs.yml b/mkdocs.yml index 1fef4428..9ca6fdeb 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -69,6 +69,7 @@ nav: - "Figure enrichment": examples/develop_picture_enrichment.py - "Table export": examples/export_tables.py - "Multimodal export": examples/export_multimodal.py + - "Force full page OCR": examples/full_page_ocr.py - RAG / QA: - "RAG with LlamaIndex 🦙": examples/rag_llamaindex.ipynb - "RAG with LangChain 🦜🔗": examples/rag_langchain.ipynb