mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
Identify headers through inhenrited style
Signed-off-by: Rafael Teixeira de Lima <Rafael.td.lima@gmail.com>
This commit is contained in:
parent
32b03b65f4
commit
4bea04dc75
@ -264,6 +264,11 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
|
|
||||||
label = paragraph.style.style_id
|
label = paragraph.style.style_id
|
||||||
name = paragraph.style.name
|
name = paragraph.style.name
|
||||||
|
base_style_label = None
|
||||||
|
base_style_name = None
|
||||||
|
if base_style := getattr(paragraph.style, "base_style", None):
|
||||||
|
base_style_label = base_style.style_id
|
||||||
|
base_style_name = base_style.name
|
||||||
|
|
||||||
if label is None:
|
if label is None:
|
||||||
return "Normal", None
|
return "Normal", None
|
||||||
@ -277,6 +282,10 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
return self._get_heading_and_level(label)
|
return self._get_heading_and_level(label)
|
||||||
if "heading" in name.lower():
|
if "heading" in name.lower():
|
||||||
return self._get_heading_and_level(name)
|
return self._get_heading_and_level(name)
|
||||||
|
if base_style_label and "heading" in base_style_label.lower():
|
||||||
|
return self._get_heading_and_level(base_style_label)
|
||||||
|
if base_style_name and "heading" in base_style_name.lower():
|
||||||
|
return self._get_heading_and_level(base_style_name)
|
||||||
|
|
||||||
return label, None
|
return label, None
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user