feat: Introduce the force-ocr cmd parameter in docling cli. Add the full_page_ocr.py example in mkdocs

Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com>
This commit is contained in:
Nikos Livathinos 2024-11-11 14:10:56 +01:00
parent 1963e7145b
commit 7234dc3a42
3 changed files with 11 additions and 4 deletions

View File

@ -153,6 +153,13 @@ def convert(
..., help="If enabled, the bitmap content will be processed using OCR." ..., help="If enabled, the bitmap content will be processed using OCR."
), ),
] = True, ] = True,
force_ocr: Annotated[
bool,
typer.Option(
...,
help="Replace any existing text with OCR generated text over the full content.",
),
] = False,
ocr_engine: Annotated[ ocr_engine: Annotated[
OcrEngine, typer.Option(..., help="The OCR engine to use.") OcrEngine, typer.Option(..., help="The OCR engine to use.")
] = OcrEngine.EASYOCR, ] = OcrEngine.EASYOCR,
@ -219,11 +226,11 @@ def convert(
match ocr_engine: match ocr_engine:
case OcrEngine.EASYOCR: case OcrEngine.EASYOCR:
ocr_options: OcrOptions = EasyOcrOptions() ocr_options: OcrOptions = EasyOcrOptions(force_full_page_ocr=force_ocr)
case OcrEngine.TESSERACT_CLI: case OcrEngine.TESSERACT_CLI:
ocr_options = TesseractCliOcrOptions() ocr_options = TesseractCliOcrOptions(force_full_page_ocr=force_ocr)
case OcrEngine.TESSERACT: case OcrEngine.TESSERACT:
ocr_options = TesseractOcrOptions() ocr_options = TesseractOcrOptions(force_full_page_ocr=force_ocr)
case _: case _:
raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}") raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}")

View File

@ -29,7 +29,6 @@ def main():
format_options={ format_options={
InputFormat.PDF: PdfFormatOption( InputFormat.PDF: PdfFormatOption(
pipeline_options=pipeline_options, pipeline_options=pipeline_options,
backend=DoclingParseDocumentBackend,
) )
} }
) )

View File

@ -69,6 +69,7 @@ nav:
- "Figure enrichment": examples/develop_picture_enrichment.py - "Figure enrichment": examples/develop_picture_enrichment.py
- "Table export": examples/export_tables.py - "Table export": examples/export_tables.py
- "Multimodal export": examples/export_multimodal.py - "Multimodal export": examples/export_multimodal.py
- "Force full page OCR": examples/full_page_ocr.py
- RAG / QA: - RAG / QA:
- "RAG with LlamaIndex 🦙": examples/rag_llamaindex.ipynb - "RAG with LlamaIndex 🦙": examples/rag_llamaindex.ipynb
- "RAG with LangChain 🦜🔗": examples/rag_langchain.ipynb - "RAG with LangChain 🦜🔗": examples/rag_langchain.ipynb