mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-10 13:48:13 +00:00
fix: Fixed docx import with headers that are also lists (#842)
* Fix for docx when headers are also lists, now recorded as appropriate headers and subheaders, unit test included Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> * Update docling/backend/msword_backend.py Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Signed-off-by: Maxim Lysak <101627549+maxmnemonic@users.noreply.github.com> * Update docling/backend/msword_backend.py Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Signed-off-by: Maxim Lysak <101627549+maxmnemonic@users.noreply.github.com> --------- Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> Signed-off-by: Maxim Lysak <101627549+maxmnemonic@users.noreply.github.com> Co-authored-by: Maksym Lysak <mly@zurich.ibm.com> Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
@@ -240,7 +240,11 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
||||
numid = None
|
||||
|
||||
# Handle lists
|
||||
if numid is not None and ilevel is not None:
|
||||
if (
|
||||
numid is not None
|
||||
and ilevel is not None
|
||||
and p_style_id not in ["Title", "Heading"]
|
||||
):
|
||||
self.add_listitem(
|
||||
element,
|
||||
docx_obj,
|
||||
@@ -254,12 +258,22 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
||||
)
|
||||
self.update_history(p_style_id, p_level, numid, ilevel)
|
||||
return
|
||||
elif numid is None and self.prev_numid() is not None: # Close list
|
||||
for key, val in self.parents.items():
|
||||
if key >= self.level_at_new_list:
|
||||
elif (
|
||||
numid is None
|
||||
and self.prev_numid() is not None
|
||||
and p_style_id not in ["Title", "Heading"]
|
||||
): # Close list
|
||||
if self.level_at_new_list:
|
||||
for key in range(len(self.parents)):
|
||||
if key >= self.level_at_new_list:
|
||||
self.parents[key] = None
|
||||
self.level = self.level_at_new_list - 1
|
||||
self.level_at_new_list = None
|
||||
else:
|
||||
for key in range(len(self.parents)):
|
||||
self.parents[key] = None
|
||||
self.level = self.level_at_new_list - 1
|
||||
self.level_at_new_list = None
|
||||
self.level = 0
|
||||
|
||||
if p_style_id in ["Title"]:
|
||||
for key, val in self.parents.items():
|
||||
self.parents[key] = None
|
||||
|
||||
Reference in New Issue
Block a user