From fbc4c4d103d0aa8fb7ccc355eca97a9f12f99e80 Mon Sep 17 00:00:00 2001 From: Benichou Date: Sun, 30 Mar 2025 15:50:24 -0400 Subject: [PATCH 1/3] bug fix to ensure handling of pictures only applies to picture with an image attribute and image part of all extensions except with emf or wmf extensions to avoid bug in adding picture to doc --- docling/backend/mspowerpoint_backend.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docling/backend/mspowerpoint_backend.py b/docling/backend/mspowerpoint_backend.py index a752e8dc..e569d2c2 100644 --- a/docling/backend/mspowerpoint_backend.py +++ b/docling/backend/mspowerpoint_backend.py @@ -392,9 +392,10 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB self.handle_tables(shape, parent_slide, slide_ind, doc, slide_size) if shape.shape_type == MSO_SHAPE_TYPE.PICTURE: # Handle Pictures - self.handle_pictures( - shape, parent_slide, slide_ind, doc, slide_size - ) + if hasattr(shape, "image"): # make sure the Picture shape has an image attribute + image_part = shape.image + if image_part.ext not in ["emf", "wmf"]: # all extensions except emf and wmf that lead to bug in adding picture to doc + self.handle_pictures(shape, parent_slide, slide_ind, doc, slide_size) # If shape doesn't have any text, move on to the next shape if not hasattr(shape, "text"): return From 68be6e1873f4720b4294a4878fff88834dab4d3e Mon Sep 17 00:00:00 2001 From: Benichou Date: Sun, 30 Mar 2025 16:02:01 -0400 Subject: [PATCH 2/3] fix: ensure handling of pictures only applies to picture with an image attribute and image part of all extensions except with emf or wmf extensions to avoid bug in adding picture to doc --- docling/backend/mspowerpoint_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docling/backend/mspowerpoint_backend.py b/docling/backend/mspowerpoint_backend.py index e569d2c2..3523f556 100644 --- a/docling/backend/mspowerpoint_backend.py +++ b/docling/backend/mspowerpoint_backend.py @@ -393,7 +393,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB if shape.shape_type == MSO_SHAPE_TYPE.PICTURE: # Handle Pictures if hasattr(shape, "image"): # make sure the Picture shape has an image attribute - image_part = shape.image + image_part = shape.image # get the image part if image_part.ext not in ["emf", "wmf"]: # all extensions except emf and wmf that lead to bug in adding picture to doc self.handle_pictures(shape, parent_slide, slide_ind, doc, slide_size) # If shape doesn't have any text, move on to the next shape From 5e12d0795adf61274041dd224e0b11effb75b8fc Mon Sep 17 00:00:00 2001 From: Benichou Date: Sun, 30 Mar 2025 16:08:19 -0400 Subject: [PATCH 3/3] fix: ensure handling of pictures only applies to picture with an image attribute and image part of all extensions except with emf or wmf extensions to avoid bug in adding picture to doc (just added ny signoff) Signed-off-by: Franck Benichou franck.benichou@sciencespo.fr --- docling/backend/mspowerpoint_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docling/backend/mspowerpoint_backend.py b/docling/backend/mspowerpoint_backend.py index 3523f556..d5ffee02 100644 --- a/docling/backend/mspowerpoint_backend.py +++ b/docling/backend/mspowerpoint_backend.py @@ -394,7 +394,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB # Handle Pictures if hasattr(shape, "image"): # make sure the Picture shape has an image attribute image_part = shape.image # get the image part - if image_part.ext not in ["emf", "wmf"]: # all extensions except emf and wmf that lead to bug in adding picture to doc + if image_part.ext not in ["emf", "wmf"]: # all extensions except emf and wmf self.handle_pictures(shape, parent_slide, slide_ind, doc, slide_size) # If shape doesn't have any text, move on to the next shape if not hasattr(shape, "text"):