diff --git a/docling/backend/mspowerpoint_backend.py b/docling/backend/mspowerpoint_backend.py index b71cd859..fc59adb3 100644 --- a/docling/backend/mspowerpoint_backend.py +++ b/docling/backend/mspowerpoint_backend.py @@ -10,11 +10,13 @@ from docling_core.types.doc import ( DoclingDocument, DocumentOrigin, GroupLabel, + ImageRef, ProvenanceItem, Size, TableCell, TableData, ) +from PIL import Image from pptx import Presentation from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER @@ -268,9 +270,20 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB return def handle_pictures(self, shape, parent_slide, slide_ind, doc): + # Get the image bytes + image = shape.image + image_bytes = image.blob + # Open it with PIL + pil_image = Image.open(BytesIO(image_bytes)) + # shape has picture prov = self.generate_prov(shape, slide_ind, "") - doc.add_picture(parent=parent_slide, caption=None, prov=prov) + doc.add_picture( + parent=parent_slide, + image=ImageRef.from_pil(image=pil_image, dpi=72), + caption=None, + prov=prov, + ) return def handle_tables(self, shape, parent_slide, slide_ind, doc):