From 6cf9fd1008d44cae1be24ec6095d382bb241522f Mon Sep 17 00:00:00 2001 From: Benichou Date: Tue, 8 Apr 2025 11:33:52 -0400 Subject: [PATCH] fix/implementing the capture of pptx_image with the same method from docx backend by extracting the drawing blip Signed-off-by: Benichou --- docling/backend/mspowerpoint_backend.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docling/backend/mspowerpoint_backend.py b/docling/backend/mspowerpoint_backend.py index 20ca15a7..8222d467 100644 --- a/docling/backend/mspowerpoint_backend.py +++ b/docling/backend/mspowerpoint_backend.py @@ -53,6 +53,10 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB "r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships", }, ) + self.xpath_expr = etree.XPath(".//a:blip", namespaces={ + "a": "http://schemas.openxmlformats.org/drawingml/2006/main", + "r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships", + }) try: if isinstance(self.path_or_stream, BytesIO): self.pptx_obj = Presentation(self.path_or_stream)