mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
Initialize docling PDF parser on module level
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
f1932fd8c5
commit
1b1af3d68c
@ -14,6 +14,8 @@ from docling.datamodel.base_models import BoundingBox, Cell, CoordOrigin, PageSi
|
|||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
pdf_parser = pdf_parser()
|
||||||
|
|
||||||
|
|
||||||
class DoclingParsePageBackend(PdfPageBackend):
|
class DoclingParsePageBackend(PdfPageBackend):
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -190,7 +192,7 @@ class DoclingParseDocumentBackend(PdfDocumentBackend):
|
|||||||
super().__init__(path_or_stream, document_hash)
|
super().__init__(path_or_stream, document_hash)
|
||||||
|
|
||||||
self._pdoc = pdfium.PdfDocument(path_or_stream)
|
self._pdoc = pdfium.PdfDocument(path_or_stream)
|
||||||
self.parser = pdf_parser()
|
self.parser = pdf_parser
|
||||||
|
|
||||||
success = False
|
success = False
|
||||||
if isinstance(path_or_stream, BytesIO):
|
if isinstance(path_or_stream, BytesIO):
|
||||||
|
Loading…
Reference in New Issue
Block a user