From 253cfab15edf07ef968b025b1cef9be72c9e7070 Mon Sep 17 00:00:00 2001 From: Benichou Date: Tue, 8 Apr 2025 11:33:52 -0400 Subject: [PATCH] fix/implementing the capture of pptx_image with the same method from docx backend by extracting the drawing blip Signed-off-by: Benichou --- docling/backend/mspowerpoint_backend.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docling/backend/mspowerpoint_backend.py b/docling/backend/mspowerpoint_backend.py index 9aaf41f5..66311de9 100644 --- a/docling/backend/mspowerpoint_backend.py +++ b/docling/backend/mspowerpoint_backend.py @@ -46,6 +46,10 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB self.pptx_obj = None self.valid = False + self.xpath_expr = etree.XPath(".//a:blip", namespaces={ + "a": "http://schemas.openxmlformats.org/drawingml/2006/main", + "r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships", + }) try: if isinstance(self.path_or_stream, BytesIO): self.pptx_obj = Presentation(self.path_or_stream)