Made smarter processing of headers, with arbitrary styling

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
Maksym Lysak 2024-10-25 14:08:47 +02:00
parent 162643c7f7
commit a5735f4fd4

View File

@ -136,18 +136,24 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
else: else:
doc_label = DocItemLabel.SECTION_HEADER doc_label = DocItemLabel.SECTION_HEADER
if isinstance(element.children[0].children, str): # Header could have arbitrary inclusion of bold, italic or emphasis,
# Straight text in the header # hence we need to traverse the tree to get full text of a header
snippet_text = element.children[0].children.strip() strings = []
elif isinstance(element.children[0].children[0].children, str):
# Bold or italic text in the header
snippet_text = element.children[0].children[0].children.strip()
elif isinstance(element.children[0].children[0].children[0].children, str):
# Emphasized text in the header
snippet_text = (
element.children[0].children[0].children[0].children.strip()
)
# Define a recursive function to traverse the tree
def traverse(node):
# Check if the node has a "children" attribute
if hasattr(node, "children"):
# If "children" is a list, continue traversal
if isinstance(node.children, list):
for child in node.children:
traverse(child)
# If "children" is text, add it to header text
elif isinstance(node.children, str):
strings.append(node.children)
traverse(element)
snippet_text = "".join(strings)
if len(snippet_text) > 0: if len(snippet_text) > 0:
parent_element = doc.add_text( parent_element = doc.add_text(
label=doc_label, parent=parent_element, text=snippet_text label=doc_label, parent=parent_element, text=snippet_text