From 98ca58ffd0f5d8d94c99d67d296476648f37054b Mon Sep 17 00:00:00 2001 From: Maxim Lysak Date: Mon, 14 Oct 2024 16:48:55 +0200 Subject: [PATCH] added support for enumerated lists Signed-off-by: Maxim Lysak --- docling/backend/mspowerpoint_backend.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docling/backend/mspowerpoint_backend.py b/docling/backend/mspowerpoint_backend.py index aeab0573..e88ac5dd 100644 --- a/docling/backend/mspowerpoint_backend.py +++ b/docling/backend/mspowerpoint_backend.py @@ -109,7 +109,9 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB def handle_text_elements(self, shape, parent_slide, slide_ind, doc): is_a_list = False + enum_list_item_value = 0 for paragraph in shape.text_frame.paragraphs: + enum_list_item_value += 1 bullet_type = "None" # Check if paragraph is a bullet point using the `element` XML p = paragraph._element @@ -157,7 +159,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB for e in p.iterfind(".//a:r", namespaces={"a": self.namespaces["a"]}): if len(e.text.strip()) > 0: e_is_a_list_item = False - + is_numbered = False if ( p.find(".//a:buChar", namespaces={"a": self.namespaces["a"]}) is not None @@ -169,13 +171,17 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB is not None ): bullet_type = "Numbered" + is_numbered = True e_is_a_list_item = True else: e_is_a_list_item = False if e_is_a_list_item: # TODO: Set marker and enumerated arguments if this is an enumeration element. + enum_marker = str(enum_list_item_value) + "." doc.add_list_item( + marker=enum_marker, + enumerated=is_numbered, parent=new_list, text=list_text, prov=prov, @@ -195,6 +201,8 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB elif placeholder_type == PP_PLACEHOLDER.SUBTITLE: DocItemLabel.SECTION_HEADER + enum_list_item_value = 1 + doc.add_text( label=doc_label, parent=parent_slide,