mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-25 19:44:34 +00:00
Fix: inconsistencies between file format backends
In msword_backend.py, the _add_header method has logic that seems to artificially limit the heading depth: pythondef _add_header(self, doc: DoclingDocument, curr_level: Optional[int], text: str, is_numbered_style: bool = False) -> None: # ... if isinstance(curr_level, int): # ... else: current_level = self.level parent_level = self.level - 1 add_level = 1 # <-- This is the problem! When curr_level is None (which happens when the heading style doesn't have a clear level number), it defaults to add_level = 1, effectively flattening deeper headings. The correct handling, instead, would be to also subtract 1, with minimum of 1: else: current_level = self.level parent_level = self.level - 1 add_level = max(1, self.level - 1) # Also subtract 1, with minimum of 1 Signed-off-by: Artus Krohn-Grimberghe <artuskg@users.noreply.github.com>
This commit is contained in:
parent
42af299fa2
commit
5501dc5725
@ -878,7 +878,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
||||
else:
|
||||
current_level = self.level
|
||||
parent_level = self.level - 1
|
||||
add_level = 1
|
||||
add_level = max(0, self.level - 1)
|
||||
|
||||
if is_numbered_style:
|
||||
if add_level in self.numbered_headers:
|
||||
|
Loading…
Reference in New Issue
Block a user