mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
reorder sections in custom_convert
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
8ec8c38de8
commit
471daee277
@ -72,7 +72,7 @@ def main():
|
||||
# and PDF Backends for various configurations.
|
||||
# Uncomment one section at the time to see the differences in the output.
|
||||
|
||||
# PyPdfium without OCR
|
||||
# PyPdfium without EasyOCR
|
||||
# --------------------
|
||||
# pipeline_options = PipelineOptions()
|
||||
# pipeline_options.do_ocr=False
|
||||
@ -84,7 +84,7 @@ def main():
|
||||
# pdf_backend=PyPdfiumDocumentBackend,
|
||||
# )
|
||||
|
||||
# PyPdfium with OCR
|
||||
# PyPdfium with EasyOCR
|
||||
# -----------------
|
||||
# pipeline_options = PipelineOptions()
|
||||
# pipeline_options.do_ocr=True
|
||||
@ -96,7 +96,7 @@ def main():
|
||||
# pdf_backend=PyPdfiumDocumentBackend,
|
||||
# )
|
||||
|
||||
# Docling Parse without OCR
|
||||
# Docling Parse without EasyOCR
|
||||
# -------------------------
|
||||
pipeline_options = PipelineOptions()
|
||||
pipeline_options.do_ocr = False
|
||||
@ -108,7 +108,7 @@ def main():
|
||||
pdf_backend=DoclingParseDocumentBackend,
|
||||
)
|
||||
|
||||
# Docling Parse with OCR
|
||||
# Docling Parse with EasyOCR
|
||||
# ----------------------
|
||||
# pipeline_options = PipelineOptions()
|
||||
# pipeline_options.do_ocr=True
|
||||
@ -120,7 +120,15 @@ def main():
|
||||
# pdf_backend=DoclingParseDocumentBackend,
|
||||
# )
|
||||
|
||||
# Docling Parse with Tesseract OCR
|
||||
# Docling Parse with Tesseract
|
||||
# ----------------------
|
||||
pipeline_options = PipelineOptions()
|
||||
pipeline_options.do_ocr = True
|
||||
pipeline_options.do_table_structure = True
|
||||
pipeline_options.table_structure_options.do_cell_matching = True
|
||||
pipeline_options.ocr_options = TesserOcrOptions()
|
||||
|
||||
# Docling Parse with Tesseract CLI
|
||||
# ----------------------
|
||||
pipeline_options = PipelineOptions()
|
||||
pipeline_options.do_ocr = True
|
||||
@ -128,14 +136,6 @@ def main():
|
||||
pipeline_options.table_structure_options.do_cell_matching = True
|
||||
pipeline_options.ocr_options = TesseractOcrOptions()
|
||||
|
||||
# Docling Parse with TesserOCR
|
||||
# ----------------------
|
||||
# pipeline_options = PipelineOptions()
|
||||
# pipeline_options.do_ocr=True
|
||||
# pipeline_options.do_table_structure=True
|
||||
# pipeline_options.table_structure_options.do_cell_matching = True
|
||||
# pipeline_options.ocr_options = TesserOcrOptions()
|
||||
|
||||
doc_converter = DocumentConverter(
|
||||
pipeline_options=pipeline_options,
|
||||
pdf_backend=DoclingParseDocumentBackend,
|
||||
|
Loading…
Reference in New Issue
Block a user