mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-29 21:44:32 +00:00
Fixed issue with group ordeering in pptx backend, added gebug log into run with formats
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
parent
82126e3871
commit
76d904164e
@ -112,6 +112,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
||||
|
||||
def handle_text_elements(self, shape, parent_slide, slide_ind, doc):
|
||||
is_a_list = False
|
||||
is_list_group_created = False
|
||||
enum_list_item_value = 0
|
||||
new_list = None
|
||||
bullet_type = "None"
|
||||
@ -153,6 +154,12 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
||||
else:
|
||||
_log.debug("No List")
|
||||
|
||||
# If there is a list inside of the shape, create a new docling list to assign list items to
|
||||
# if is_a_list:
|
||||
# new_list = doc.add_group(
|
||||
# label=list_label, name=f"list", parent=parent_slide
|
||||
# )
|
||||
|
||||
# Iterate through paragraphs to build up text
|
||||
for paragraph in shape.text_frame.paragraphs:
|
||||
# p_text = paragraph.text.strip()
|
||||
@ -219,16 +226,15 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
||||
prov=prov,
|
||||
)
|
||||
|
||||
# If there is a list inside of the shape, create a new docling list to assign list items to
|
||||
if is_a_list:
|
||||
new_list = doc.add_group(
|
||||
label=list_label, name=f"list", parent=parent_slide
|
||||
)
|
||||
|
||||
if len(inline_list_item_text) > 0:
|
||||
enum_marker = ""
|
||||
if is_numbered:
|
||||
enum_marker = str(enum_list_item_value) + "."
|
||||
if not is_list_group_created:
|
||||
new_list = doc.add_group(
|
||||
label=list_label, name=f"list", parent=parent_slide
|
||||
)
|
||||
is_list_group_created = True
|
||||
doc.add_list_item(
|
||||
marker=enum_marker,
|
||||
enumerated=is_numbered,
|
||||
@ -328,7 +334,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
||||
if len(tcells) > 0:
|
||||
# If table is not fully empty...
|
||||
# Create Docling table
|
||||
doc.add_table(data=data, prov=prov)
|
||||
doc.add_table(parent=parent_slide, data=data, prov=prov)
|
||||
return
|
||||
|
||||
def walk_linear(self, pptx_obj, doc) -> DoclingDocument:
|
||||
|
@ -65,7 +65,7 @@ def main():
|
||||
f"Document {res.input.file.name} converted."
|
||||
f"\nSaved markdown output to: {str(out_path)}"
|
||||
)
|
||||
# print(res.docdocument.export_to_markdown())
|
||||
_log.debug(res.document._export_to_indented_text(max_text_len=16))
|
||||
# Export Docling document format to markdowndoc:
|
||||
with (out_path / f"{res.input.file.stem}.md").open("w") as fp:
|
||||
fp.write(res.document.export_to_markdown())
|
||||
|
Loading…
Reference in New Issue
Block a user