mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-11 06:08:09 +00:00
feat: update parser with bytesio interface and set as new default backend (#32)
* update parser with bytesio interface Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * change default backend Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update DEFAULT_BACKEND Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -150,10 +150,11 @@ class DoclingParseDocumentBackend(PdfDocumentBackend):
|
||||
super().__init__(path_or_stream)
|
||||
self._pdoc = pdfium.PdfDocument(path_or_stream)
|
||||
# Parsing cells with docling_parser call
|
||||
if isinstance(path_or_stream, BytesIO):
|
||||
raise NotImplemented("This backend does not support byte streams yet.")
|
||||
parser = pdf_parser()
|
||||
self._parser_doc = parser.find_cells(str(path_or_stream))
|
||||
if isinstance(path_or_stream, BytesIO):
|
||||
self._parser_doc = parser.find_cells_from_bytesio(path_or_stream)
|
||||
else:
|
||||
self._parser_doc = parser.find_cells(str(path_or_stream))
|
||||
|
||||
def page_count(self) -> int:
|
||||
return len(self._parser_doc["pages"])
|
||||
|
||||
Reference in New Issue
Block a user