mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-30 14:04:27 +00:00
Added proper handling of headers with bold, italic or emphasis
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
parent
1c933e20f8
commit
97999ebb43
@ -135,11 +135,24 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
doc_label = DocItemLabel.TITLE
|
doc_label = DocItemLabel.TITLE
|
||||||
else:
|
else:
|
||||||
doc_label = DocItemLabel.SECTION_HEADER
|
doc_label = DocItemLabel.SECTION_HEADER
|
||||||
snippet_text = element.children[0].children.strip()
|
|
||||||
|
|
||||||
parent_element = doc.add_text(
|
if isinstance(element.children[0].children, str):
|
||||||
label=doc_label, parent=parent_element, text=snippet_text
|
# Straight text in the header
|
||||||
)
|
snippet_text = element.children[0].children.strip()
|
||||||
|
elif isinstance(element.children[0].children[0].children, str):
|
||||||
|
# Bold or italic text in the header
|
||||||
|
snippet_text = element.children[0].children[0].children.strip()
|
||||||
|
elif isinstance(element.children[0].children[0].children[0].children, str):
|
||||||
|
# Emphasized text in the header
|
||||||
|
snippet_text = (
|
||||||
|
element.children[0].children[0].children[0].children.strip()
|
||||||
|
)
|
||||||
|
print("snippet_text: {}".format(snippet_text))
|
||||||
|
|
||||||
|
if len(snippet_text) > 0:
|
||||||
|
parent_element = doc.add_text(
|
||||||
|
label=doc_label, parent=parent_element, text=snippet_text
|
||||||
|
)
|
||||||
|
|
||||||
elif isinstance(element, marko.block.List):
|
elif isinstance(element, marko.block.List):
|
||||||
self.close_table(doc)
|
self.close_table(doc)
|
||||||
|
Loading…
Reference in New Issue
Block a user