mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 23:12:20 +00:00
Fix DP2 backend code, change CLI default backend
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
3b1e1707bb
commit
8323997737
@ -210,12 +210,14 @@ class DoclingParseV2DocumentBackend(PdfDocumentBackend):
|
|||||||
self.parser = pdf_parser_v2("fatal")
|
self.parser = pdf_parser_v2("fatal")
|
||||||
|
|
||||||
success = False
|
success = False
|
||||||
if isinstance(path_or_stream, BytesIO):
|
if isinstance(self.path_or_stream, BytesIO):
|
||||||
success = self.parser.load_document_from_bytesio(
|
success = self.parser.load_document_from_bytesio(
|
||||||
self.document_hash, path_or_stream
|
self.document_hash, self.path_or_stream
|
||||||
|
)
|
||||||
|
elif isinstance(self.path_or_stream, Path):
|
||||||
|
success = self.parser.load_document(
|
||||||
|
self.document_hash, str(self.path_or_stream)
|
||||||
)
|
)
|
||||||
elif isinstance(path_or_stream, Path):
|
|
||||||
success = self.parser.load_document(self.document_hash, str(path_or_stream))
|
|
||||||
|
|
||||||
if not success:
|
if not success:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
|
@ -208,7 +208,7 @@ def convert(
|
|||||||
] = None,
|
] = None,
|
||||||
pdf_backend: Annotated[
|
pdf_backend: Annotated[
|
||||||
PdfBackend, typer.Option(..., help="The PDF backend to use.")
|
PdfBackend, typer.Option(..., help="The PDF backend to use.")
|
||||||
] = PdfBackend.DLPARSE_V1,
|
] = PdfBackend.DLPARSE_V2,
|
||||||
table_mode: Annotated[
|
table_mode: Annotated[
|
||||||
TableFormerMode,
|
TableFormerMode,
|
||||||
typer.Option(..., help="The mode to use in the table structure model."),
|
typer.Option(..., help="The mode to use in the table structure model."),
|
||||||
|
@ -15,7 +15,7 @@ def test_doc_paths():
|
|||||||
Path("tests/data/docx/word_sample.docx"),
|
Path("tests/data/docx/word_sample.docx"),
|
||||||
Path("tests/data/docx/lorem_ipsum.docx"),
|
Path("tests/data/docx/lorem_ipsum.docx"),
|
||||||
Path("tests/data/pptx/powerpoint_sample.pptx"),
|
Path("tests/data/pptx/powerpoint_sample.pptx"),
|
||||||
# Path("tests/data/2305.03393v1-pg9-img.png"),
|
Path("tests/data/2305.03393v1-pg9-img.png"),
|
||||||
Path("tests/data/2206.01062.pdf"),
|
Path("tests/data/2206.01062.pdf"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user