added TableFormerMode.ACCURATE as default in cli

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
Peter Staar 2024-11-14 07:45:36 +01:00
parent 9e54a74410
commit f4fc6cfd4a

View File

@ -24,6 +24,7 @@ from docling.datamodel.pipeline_options import (
PdfPipelineOptions,
TesseractCliOcrOptions,
TesseractOcrOptions,
TableFormerMode,
)
from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
@ -225,6 +226,7 @@ def convert(
do_table_structure=True,
)
pipeline_options.table_structure_options.do_cell_matching = True # do_cell_matching
pipeline_options.table_structure_options.mode = TableFormerMode.ACCURATE
format_options: Dict[InputFormat, FormatOption] = {
InputFormat.PDF: PdfFormatOption(