From 5501dc57254c03df00630beac6b4400fcce197fe Mon Sep 17 00:00:00 2001 From: Artus Krohn-Grimberghe Date: Wed, 18 Jun 2025 15:49:25 +0200 Subject: [PATCH] Fix: inconsistencies between file format backends In msword_backend.py, the _add_header method has logic that seems to artificially limit the heading depth: pythondef _add_header(self, doc: DoclingDocument, curr_level: Optional[int], text: str, is_numbered_style: bool = False) -> None: # ... if isinstance(curr_level, int): # ... else: current_level = self.level parent_level = self.level - 1 add_level = 1 # <-- This is the problem! When curr_level is None (which happens when the heading style doesn't have a clear level number), it defaults to add_level = 1, effectively flattening deeper headings. The correct handling, instead, would be to also subtract 1, with minimum of 1: else: current_level = self.level parent_level = self.level - 1 add_level = max(1, self.level - 1) # Also subtract 1, with minimum of 1 Signed-off-by: Artus Krohn-Grimberghe --- docling/backend/msword_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docling/backend/msword_backend.py b/docling/backend/msword_backend.py index ad33885e..64cf1f03 100644 --- a/docling/backend/msword_backend.py +++ b/docling/backend/msword_backend.py @@ -878,7 +878,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend): else: current_level = self.level parent_level = self.level - 1 - add_level = 1 + add_level = max(0, self.level - 1) if is_numbered_style: if add_level in self.numbered_headers: