Initialize docling PDF parser on module level

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2024-09-16 16:02:48 +02:00
parent f1932fd8c5
commit 1b1af3d68c

View File

@ -14,6 +14,8 @@ from docling.datamodel.base_models import BoundingBox, Cell, CoordOrigin, PageSi
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
pdf_parser = pdf_parser()
class DoclingParsePageBackend(PdfPageBackend): class DoclingParsePageBackend(PdfPageBackend):
def __init__( def __init__(
@ -190,7 +192,7 @@ class DoclingParseDocumentBackend(PdfDocumentBackend):
super().__init__(path_or_stream, document_hash) super().__init__(path_or_stream, document_hash)
self._pdoc = pdfium.PdfDocument(path_or_stream) self._pdoc = pdfium.PdfDocument(path_or_stream)
self.parser = pdf_parser() self.parser = pdf_parser
success = False success = False
if isinstance(path_or_stream, BytesIO): if isinstance(path_or_stream, BytesIO):