mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
pypdfium2: just forward input to PdfDocument directly
PdfDocument() should do accept strings, paths, bytes and byte streams. If not, please file a bug report. Signed-off-by: mara004 <geisserml@gmail.com>
This commit is contained in:
parent
9f2add112f
commit
2b86acec5d
@ -201,13 +201,7 @@ class PyPdfiumPageBackend(PdfPageBackend):
|
|||||||
class PyPdfiumDocumentBackend(PdfDocumentBackend):
|
class PyPdfiumDocumentBackend(PdfDocumentBackend):
|
||||||
def __init__(self, path_or_stream: Iterable[Union[BytesIO, Path]]):
|
def __init__(self, path_or_stream: Iterable[Union[BytesIO, Path]]):
|
||||||
super().__init__(path_or_stream)
|
super().__init__(path_or_stream)
|
||||||
|
self._pdoc = pdfium.PdfDocument(path_or_stream)
|
||||||
if isinstance(path_or_stream, Path):
|
|
||||||
self._pdoc = pdfium.PdfDocument(path_or_stream)
|
|
||||||
elif isinstance(path_or_stream, BytesIO):
|
|
||||||
self._pdoc = pdfium.PdfDocument(
|
|
||||||
path_or_stream
|
|
||||||
) # TODO Fix me, won't accept bytes.
|
|
||||||
|
|
||||||
def page_count(self) -> int:
|
def page_count(self) -> int:
|
||||||
return len(self._pdoc)
|
return len(self._pdoc)
|
||||||
|
Loading…
Reference in New Issue
Block a user