feat: add more options in the CLI

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2024-11-02 09:41:33 +01:00
parent 9d8865856d
commit af32a049d4
2 changed files with 30 additions and 7 deletions

View File

@ -5,12 +5,15 @@ import time
import warnings
from enum import Enum
from pathlib import Path
from typing import Annotated, Dict, Iterable, List, Optional
from typing import Annotated, Dict, Iterable, List, Optional, Type
import typer
from docling_core.utils.file import resolve_file_source
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
from docling.backend.pdf_backend import PdfDocumentBackend
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
from docling.datamodel.base_models import (
ConversionStatus,
FormatToExtensions,
@ -22,6 +25,7 @@ from docling.datamodel.pipeline_options import (
EasyOcrOptions,
OcrOptions,
PdfPipelineOptions,
TableFormerMode,
TesseractCliOcrOptions,
TesseractOcrOptions,
)
@ -58,9 +62,10 @@ def version_callback(value: bool):
# Define an enum for the backend options
class Backend(str, Enum):
class PdfBackend(str, Enum):
PYPDFIUM2 = "pypdfium2"
DOCLING = "docling"
DLPARSE_V1 = "dlparse_v1"
DLPARSE_V2 = "dlparse_v2"
# Define an enum for the ocr engines
@ -151,6 +156,13 @@ def convert(
ocr_engine: Annotated[
OcrEngine, typer.Option(..., help="The OCR engine to use.")
] = OcrEngine.EASYOCR,
pdf_backend: Annotated[
PdfBackend, typer.Option(..., help="The PDF backend to use.")
] = PdfBackend.DLPARSE_V1,
table_mode: Annotated[
TableFormerMode,
typer.Option(..., help="The mode to use in the table structure model."),
] = TableFormerMode.FAST,
abort_on_error: Annotated[
bool,
typer.Option(
@ -217,11 +229,22 @@ def convert(
do_table_structure=True,
)
pipeline_options.table_structure_options.do_cell_matching = True # do_cell_matching
pipeline_options.table_structure_options.mode = table_mode
match pdf_backend:
case PdfBackend.DLPARSE_V1:
backend: Type[PdfDocumentBackend] = DoclingParseDocumentBackend
case PdfBackend.DLPARSE_V2:
backend = DoclingParseV2DocumentBackend
case PdfBackend.PYPDFIUM2:
backend = PyPdfiumDocumentBackend
case _:
raise RuntimeError(f"Unexpected PDF backend type {pdf_backend}")
format_options: Dict[InputFormat, FormatOption] = {
InputFormat.PDF: PdfFormatOption(
pipeline_options=pipeline_options,
backend=DoclingParseDocumentBackend, # pdf_backend
backend=backend, # pdf_backend
)
}
doc_converter = DocumentConverter(

View File

@ -1,4 +1,4 @@
from enum import Enum, auto
from enum import Enum
from pathlib import Path
from typing import List, Literal, Optional, Union
@ -6,8 +6,8 @@ from pydantic import BaseModel, ConfigDict, Field
class TableFormerMode(str, Enum):
FAST = auto()
ACCURATE = auto()
FAST = "fast"
ACCURATE = "accurate"
class TableStructureOptions(BaseModel):