mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 15:02:21 +00:00
Added picture data for pptx pictures
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
parent
364d37ca96
commit
a54f583415
@ -10,11 +10,13 @@ from docling_core.types.doc import (
|
|||||||
DoclingDocument,
|
DoclingDocument,
|
||||||
DocumentOrigin,
|
DocumentOrigin,
|
||||||
GroupLabel,
|
GroupLabel,
|
||||||
|
ImageRef,
|
||||||
ProvenanceItem,
|
ProvenanceItem,
|
||||||
Size,
|
Size,
|
||||||
TableCell,
|
TableCell,
|
||||||
TableData,
|
TableData,
|
||||||
)
|
)
|
||||||
|
from PIL import Image
|
||||||
from pptx import Presentation
|
from pptx import Presentation
|
||||||
from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER
|
from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER
|
||||||
|
|
||||||
@ -268,9 +270,20 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
|||||||
return
|
return
|
||||||
|
|
||||||
def handle_pictures(self, shape, parent_slide, slide_ind, doc):
|
def handle_pictures(self, shape, parent_slide, slide_ind, doc):
|
||||||
|
# Get the image bytes
|
||||||
|
image = shape.image
|
||||||
|
image_bytes = image.blob
|
||||||
|
# Open it with PIL
|
||||||
|
pil_image = Image.open(BytesIO(image_bytes))
|
||||||
|
|
||||||
# shape has picture
|
# shape has picture
|
||||||
prov = self.generate_prov(shape, slide_ind, "")
|
prov = self.generate_prov(shape, slide_ind, "")
|
||||||
doc.add_picture(parent=parent_slide, caption=None, prov=prov)
|
doc.add_picture(
|
||||||
|
parent=parent_slide,
|
||||||
|
image=ImageRef.from_pil(image=pil_image, dpi=72),
|
||||||
|
caption=None,
|
||||||
|
prov=prov,
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
def handle_tables(self, shape, parent_slide, slide_ind, doc):
|
def handle_tables(self, shape, parent_slide, slide_ind, doc):
|
||||||
|
Loading…
Reference in New Issue
Block a user