mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
feat: Add PPTX notes slides (#474)
* feat: Add PPTX notes slides Presenter notes may have useful information and should also be extracted. Signed-off-by: Maciej Wieczorek <maciej@wieczorek.co> * feat: Move presenter notes into furniture Signed-off-by: Maciej Wieczorek <maciej@wieczorek.co> --------- Signed-off-by: Maciej Wieczorek <maciej@wieczorek.co>
This commit is contained in:
@@ -16,6 +16,7 @@ from docling_core.types.doc import (
|
||||
TableCell,
|
||||
TableData,
|
||||
)
|
||||
from docling_core.types.doc.document import ContentLayer
|
||||
from PIL import Image, UnidentifiedImageError
|
||||
from pptx import Presentation
|
||||
from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER
|
||||
@@ -421,4 +422,21 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
||||
for shape in slide.shapes:
|
||||
handle_shapes(shape, parent_slide, slide_ind, doc, slide_size)
|
||||
|
||||
# Handle notes slide
|
||||
if slide.has_notes_slide:
|
||||
notes_slide = slide.notes_slide
|
||||
notes_text = notes_slide.notes_text_frame.text.strip()
|
||||
if notes_text:
|
||||
bbox = BoundingBox(l=0, t=0, r=0, b=0)
|
||||
prov = ProvenanceItem(
|
||||
page_no=slide_ind + 1, charspan=[0, len(notes_text)], bbox=bbox
|
||||
)
|
||||
doc.add_text(
|
||||
label=DocItemLabel.TEXT,
|
||||
parent=parent_slide,
|
||||
text=notes_text,
|
||||
prov=prov,
|
||||
content_layer=ContentLayer.FURNITURE,
|
||||
)
|
||||
|
||||
return doc
|
||||
|
||||
Reference in New Issue
Block a user