mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-30 22:14:37 +00:00
feat: Introduce the force-ocr cmd parameter in docling cli. Add the full_page_ocr.py example in mkdocs
Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com>
This commit is contained in:
parent
1963e7145b
commit
7234dc3a42
@ -153,6 +153,13 @@ def convert(
|
||||
..., help="If enabled, the bitmap content will be processed using OCR."
|
||||
),
|
||||
] = True,
|
||||
force_ocr: Annotated[
|
||||
bool,
|
||||
typer.Option(
|
||||
...,
|
||||
help="Replace any existing text with OCR generated text over the full content.",
|
||||
),
|
||||
] = False,
|
||||
ocr_engine: Annotated[
|
||||
OcrEngine, typer.Option(..., help="The OCR engine to use.")
|
||||
] = OcrEngine.EASYOCR,
|
||||
@ -219,11 +226,11 @@ def convert(
|
||||
|
||||
match ocr_engine:
|
||||
case OcrEngine.EASYOCR:
|
||||
ocr_options: OcrOptions = EasyOcrOptions()
|
||||
ocr_options: OcrOptions = EasyOcrOptions(force_full_page_ocr=force_ocr)
|
||||
case OcrEngine.TESSERACT_CLI:
|
||||
ocr_options = TesseractCliOcrOptions()
|
||||
ocr_options = TesseractCliOcrOptions(force_full_page_ocr=force_ocr)
|
||||
case OcrEngine.TESSERACT:
|
||||
ocr_options = TesseractOcrOptions()
|
||||
ocr_options = TesseractOcrOptions(force_full_page_ocr=force_ocr)
|
||||
case _:
|
||||
raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}")
|
||||
|
||||
|
@ -29,7 +29,6 @@ def main():
|
||||
format_options={
|
||||
InputFormat.PDF: PdfFormatOption(
|
||||
pipeline_options=pipeline_options,
|
||||
backend=DoclingParseDocumentBackend,
|
||||
)
|
||||
}
|
||||
)
|
||||
|
@ -69,6 +69,7 @@ nav:
|
||||
- "Figure enrichment": examples/develop_picture_enrichment.py
|
||||
- "Table export": examples/export_tables.py
|
||||
- "Multimodal export": examples/export_multimodal.py
|
||||
- "Force full page OCR": examples/full_page_ocr.py
|
||||
- RAG / QA:
|
||||
- "RAG with LlamaIndex 🦙": examples/rag_llamaindex.ipynb
|
||||
- "RAG with LangChain 🦜🔗": examples/rag_langchain.ipynb
|
||||
|
Loading…
Reference in New Issue
Block a user