fix: improve HTML layer detection, various MD fixes (#1241)
Some checks failed
Run Docs CD / build-deploy-docs (push) Failing after 1m31s
Run Docs CI / build-docs (push) Failing after 54s

Markdown fixes:
- properly propagate section header levels
- improve handling of list subroots without text

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>
This commit is contained in:
Panos Vagenas
2025-03-26 16:07:14 +01:00
committed by GitHub
parent 85c4df887b
commit 9210812bfa
8 changed files with 560 additions and 465 deletions

View File

@@ -206,9 +206,9 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
hlevel = int(element.name.replace("h", ""))
text = element.text.strip()
if hlevel == 1:
self.content_layer = ContentLayer.BODY
self.content_layer = ContentLayer.BODY
if hlevel == 1:
for key in self.parents.keys():
self.parents[key] = None

View File

@@ -212,9 +212,16 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
traverse(element)
snippet_text = "".join(strings)
if len(snippet_text) > 0:
parent_item = doc.add_text(
label=doc_label, parent=parent_item, text=snippet_text
)
if doc_label == DocItemLabel.SECTION_HEADER:
parent_item = doc.add_heading(
text=snippet_text,
level=element.level - 1,
parent=parent_item,
)
else:
parent_item = doc.add_text(
label=doc_label, parent=parent_item, text=snippet_text
)
elif isinstance(element, marko.block.List):
has_non_empty_list_items = False
@@ -232,12 +239,15 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
label=label, name=f"list", parent=parent_item
)
elif isinstance(element, marko.block.ListItem) and len(element.children) > 0:
elif (
isinstance(element, marko.block.ListItem)
and len(element.children) > 0
and isinstance((first_child := element.children[0]), marko.block.Paragraph)
):
self._close_table(doc)
self._process_inline_text(parent_item, doc)
_log.debug(" - List item")
first_child = element.children[0]
snippet_text = str(first_child.children[0].children) # type: ignore
is_numbered = False
if (