reformatted all

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
Peter Staar 2024-09-12 08:39:19 +02:00
parent 14ab351fdb
commit 3757c61703

View File

@ -1,11 +1,10 @@
import argparse
import json
import logging
import time
from pathlib import Path
from typing import Iterable
import argparse
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
from docling.datamodel.base_models import ConversionStatus, PipelineOptions
@ -16,6 +15,7 @@ _log = logging.getLogger(__name__)
from enum import Enum
# Define an enum for the backend options
class Backend(Enum):
PDFIUM = "pdfium"
@ -68,9 +68,7 @@ def export_documents(
def main(pdf, ocr, backend):
logging.basicConfig(level=logging.INFO)
input_doc_paths = [
Path(pdf)
]
input_doc_paths = [Path(pdf)]
###########################################################################
@ -154,14 +152,18 @@ if __name__ == "__main__":
# Add arguments
parser.add_argument("--pdf", type=str, help="Path to the PDF file.")
parser.add_argument("--ocr", type=bool, default=False, help="Enable OCR (True or False).")
parser.add_argument(
"--ocr", type=bool, default=False, help="Enable OCR (True or False)."
)
# Add the backend option as an enum
parser.add_argument("--backend", type=lambda b: Backend[b.upper()],
choices=list(Backend), default=Backend.DOCLING,
help="Select backend (pdfium or docling). Default is docling.")
parser.add_argument(
"--backend",
type=lambda b: Backend[b.upper()],
choices=list(Backend),
default=Backend.DOCLING,
help="Select backend (pdfium or docling). Default is docling.",
)
# Parse the arguments
args = parser.parse_args()