reformatted all

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
Peter Staar 2024-09-12 08:39:19 +02:00
parent 14ab351fdb
commit 3757c61703

View File

@ -1,11 +1,10 @@
import argparse
import json import json
import logging import logging
import time import time
from pathlib import Path from pathlib import Path
from typing import Iterable from typing import Iterable
import argparse
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
from docling.datamodel.base_models import ConversionStatus, PipelineOptions from docling.datamodel.base_models import ConversionStatus, PipelineOptions
@ -16,6 +15,7 @@ _log = logging.getLogger(__name__)
from enum import Enum from enum import Enum
# Define an enum for the backend options # Define an enum for the backend options
class Backend(Enum): class Backend(Enum):
PDFIUM = "pdfium" PDFIUM = "pdfium"
@ -68,9 +68,7 @@ def export_documents(
def main(pdf, ocr, backend): def main(pdf, ocr, backend):
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
input_doc_paths = [ input_doc_paths = [Path(pdf)]
Path(pdf)
]
########################################################################### ###########################################################################
@ -154,14 +152,18 @@ if __name__ == "__main__":
# Add arguments # Add arguments
parser.add_argument("--pdf", type=str, help="Path to the PDF file.") parser.add_argument("--pdf", type=str, help="Path to the PDF file.")
parser.add_argument("--ocr", type=bool, default=False, help="Enable OCR (True or False).") parser.add_argument(
"--ocr", type=bool, default=False, help="Enable OCR (True or False)."
)
# Add the backend option as an enum # Add the backend option as an enum
parser.add_argument("--backend", type=lambda b: Backend[b.upper()], parser.add_argument(
choices=list(Backend), default=Backend.DOCLING, "--backend",
help="Select backend (pdfium or docling). Default is docling.") type=lambda b: Backend[b.upper()],
choices=list(Backend),
default=Backend.DOCLING,
help="Select backend (pdfium or docling). Default is docling.",
)
# Parse the arguments # Parse the arguments
args = parser.parse_args() args = parser.parse_args()