diff --git a/docling/backend/docling_parse_v2_backend.py b/docling/backend/docling_parse_v2_backend.py index 93f33466..12d7df55 100644 --- a/docling/backend/docling_parse_v2_backend.py +++ b/docling/backend/docling_parse_v2_backend.py @@ -210,12 +210,14 @@ class DoclingParseV2DocumentBackend(PdfDocumentBackend): self.parser = pdf_parser_v2("fatal") success = False - if isinstance(path_or_stream, BytesIO): + if isinstance(self.path_or_stream, BytesIO): success = self.parser.load_document_from_bytesio( - self.document_hash, path_or_stream + self.document_hash, self.path_or_stream + ) + elif isinstance(self.path_or_stream, Path): + success = self.parser.load_document( + self.document_hash, str(self.path_or_stream) ) - elif isinstance(path_or_stream, Path): - success = self.parser.load_document(self.document_hash, str(path_or_stream)) if not success: raise RuntimeError( diff --git a/docling/cli/main.py b/docling/cli/main.py index ee18be97..7ca7adf2 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -208,7 +208,7 @@ def convert( ] = None, pdf_backend: Annotated[ PdfBackend, typer.Option(..., help="The PDF backend to use.") - ] = PdfBackend.DLPARSE_V1, + ] = PdfBackend.DLPARSE_V2, table_mode: Annotated[ TableFormerMode, typer.Option(..., help="The mode to use in the table structure model."), diff --git a/tests/test_legacy_format_transform.py b/tests/test_legacy_format_transform.py index 90bdaf51..28800edd 100644 --- a/tests/test_legacy_format_transform.py +++ b/tests/test_legacy_format_transform.py @@ -15,7 +15,7 @@ def test_doc_paths(): Path("tests/data/docx/word_sample.docx"), Path("tests/data/docx/lorem_ipsum.docx"), Path("tests/data/pptx/powerpoint_sample.pptx"), - # Path("tests/data/2305.03393v1-pg9-img.png"), + Path("tests/data/2305.03393v1-pg9-img.png"), Path("tests/data/2206.01062.pdf"), ]