Fix DP2 backend code, change CLI default backend

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2024-12-09 12:48:30 +01:00
parent 3b1e1707bb
commit 8323997737
3 changed files with 8 additions and 6 deletions

View File

@ -210,12 +210,14 @@ class DoclingParseV2DocumentBackend(PdfDocumentBackend):
self.parser = pdf_parser_v2("fatal")
success = False
if isinstance(path_or_stream, BytesIO):
if isinstance(self.path_or_stream, BytesIO):
success = self.parser.load_document_from_bytesio(
self.document_hash, path_or_stream
self.document_hash, self.path_or_stream
)
elif isinstance(self.path_or_stream, Path):
success = self.parser.load_document(
self.document_hash, str(self.path_or_stream)
)
elif isinstance(path_or_stream, Path):
success = self.parser.load_document(self.document_hash, str(path_or_stream))
if not success:
raise RuntimeError(

View File

@ -208,7 +208,7 @@ def convert(
] = None,
pdf_backend: Annotated[
PdfBackend, typer.Option(..., help="The PDF backend to use.")
] = PdfBackend.DLPARSE_V1,
] = PdfBackend.DLPARSE_V2,
table_mode: Annotated[
TableFormerMode,
typer.Option(..., help="The mode to use in the table structure model."),

View File

@ -15,7 +15,7 @@ def test_doc_paths():
Path("tests/data/docx/word_sample.docx"),
Path("tests/data/docx/lorem_ipsum.docx"),
Path("tests/data/pptx/powerpoint_sample.pptx"),
# Path("tests/data/2305.03393v1-pg9-img.png"),
Path("tests/data/2305.03393v1-pg9-img.png"),
Path("tests/data/2206.01062.pdf"),
]