Fix DP2 backend code, change CLI default backend

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2024-12-09 12:48:30 +01:00
parent 3b1e1707bb
commit 8323997737
3 changed files with 8 additions and 6 deletions

View File

@ -210,12 +210,14 @@ class DoclingParseV2DocumentBackend(PdfDocumentBackend):
self.parser = pdf_parser_v2("fatal") self.parser = pdf_parser_v2("fatal")
success = False success = False
if isinstance(path_or_stream, BytesIO): if isinstance(self.path_or_stream, BytesIO):
success = self.parser.load_document_from_bytesio( success = self.parser.load_document_from_bytesio(
self.document_hash, path_or_stream self.document_hash, self.path_or_stream
)
elif isinstance(self.path_or_stream, Path):
success = self.parser.load_document(
self.document_hash, str(self.path_or_stream)
) )
elif isinstance(path_or_stream, Path):
success = self.parser.load_document(self.document_hash, str(path_or_stream))
if not success: if not success:
raise RuntimeError( raise RuntimeError(

View File

@ -208,7 +208,7 @@ def convert(
] = None, ] = None,
pdf_backend: Annotated[ pdf_backend: Annotated[
PdfBackend, typer.Option(..., help="The PDF backend to use.") PdfBackend, typer.Option(..., help="The PDF backend to use.")
] = PdfBackend.DLPARSE_V1, ] = PdfBackend.DLPARSE_V2,
table_mode: Annotated[ table_mode: Annotated[
TableFormerMode, TableFormerMode,
typer.Option(..., help="The mode to use in the table structure model."), typer.Option(..., help="The mode to use in the table structure model."),

View File

@ -15,7 +15,7 @@ def test_doc_paths():
Path("tests/data/docx/word_sample.docx"), Path("tests/data/docx/word_sample.docx"),
Path("tests/data/docx/lorem_ipsum.docx"), Path("tests/data/docx/lorem_ipsum.docx"),
Path("tests/data/pptx/powerpoint_sample.pptx"), Path("tests/data/pptx/powerpoint_sample.pptx"),
# Path("tests/data/2305.03393v1-pg9-img.png"), Path("tests/data/2305.03393v1-pg9-img.png"),
Path("tests/data/2206.01062.pdf"), Path("tests/data/2206.01062.pdf"),
] ]