Added picture data for pptx pictures

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
Maksym Lysak 2024-11-15 14:35:25 +01:00
parent 364d37ca96
commit a54f583415

View File

@ -10,11 +10,13 @@ from docling_core.types.doc import (
DoclingDocument,
DocumentOrigin,
GroupLabel,
ImageRef,
ProvenanceItem,
Size,
TableCell,
TableData,
)
from PIL import Image
from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER
@ -268,9 +270,20 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
return
def handle_pictures(self, shape, parent_slide, slide_ind, doc):
# Get the image bytes
image = shape.image
image_bytes = image.blob
# Open it with PIL
pil_image = Image.open(BytesIO(image_bytes))
# shape has picture
prov = self.generate_prov(shape, slide_ind, "")
doc.add_picture(parent=parent_slide, caption=None, prov=prov)
doc.add_picture(
parent=parent_slide,
image=ImageRef.from_pil(image=pil_image, dpi=72),
caption=None,
prov=prov,
)
return
def handle_tables(self, shape, parent_slide, slide_ind, doc):