From fbc4c4d103d0aa8fb7ccc355eca97a9f12f99e80 Mon Sep 17 00:00:00 2001 From: Benichou Date: Sun, 30 Mar 2025 15:50:24 -0400 Subject: [PATCH] bug fix to ensure handling of pictures only applies to picture with an image attribute and image part of all extensions except with emf or wmf extensions to avoid bug in adding picture to doc --- docling/backend/mspowerpoint_backend.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docling/backend/mspowerpoint_backend.py b/docling/backend/mspowerpoint_backend.py index a752e8dc..e569d2c2 100644 --- a/docling/backend/mspowerpoint_backend.py +++ b/docling/backend/mspowerpoint_backend.py @@ -392,9 +392,10 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB self.handle_tables(shape, parent_slide, slide_ind, doc, slide_size) if shape.shape_type == MSO_SHAPE_TYPE.PICTURE: # Handle Pictures - self.handle_pictures( - shape, parent_slide, slide_ind, doc, slide_size - ) + if hasattr(shape, "image"): # make sure the Picture shape has an image attribute + image_part = shape.image + if image_part.ext not in ["emf", "wmf"]: # all extensions except emf and wmf that lead to bug in adding picture to doc + self.handle_pictures(shape, parent_slide, slide_ind, doc, slide_size) # If shape doesn't have any text, move on to the next shape if not hasattr(shape, "text"): return