mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-29 13:34:21 +00:00
Add standalone equations as DocItem formula
Signed-off-by: Rafael Teixeira de Lima <Rafael.td.lima@gmail.com>
This commit is contained in:
parent
f5034944b8
commit
c7289f647a
@ -242,8 +242,8 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
def handle_text_elements(self, element, docx_obj, doc):
|
def handle_text_elements(self, element, docx_obj, doc):
|
||||||
paragraph = docx.text.paragraph.Paragraph(element, docx_obj)
|
paragraph = docx.text.paragraph.Paragraph(element, docx_obj)
|
||||||
|
|
||||||
text = paragraph.text
|
raw_text = paragraph.text
|
||||||
text = self.handle_equations_in_text(element=element, text=text)
|
text = self.handle_equations_in_text(element=element, text=raw_text)
|
||||||
|
|
||||||
if text is None:
|
if text is None:
|
||||||
return
|
return
|
||||||
@ -281,21 +281,20 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
self.parents[key] = None
|
self.parents[key] = None
|
||||||
self.level = self.level_at_new_list - 1
|
self.level = self.level_at_new_list - 1
|
||||||
self.level_at_new_list = None
|
self.level_at_new_list = None
|
||||||
|
|
||||||
if p_style_id in ["Title"]:
|
if p_style_id in ["Title"]:
|
||||||
for key, val in self.parents.items():
|
for key, val in self.parents.items():
|
||||||
self.parents[key] = None
|
self.parents[key] = None
|
||||||
self.parents[0] = doc.add_text(
|
self.parents[0] = doc.add_text(
|
||||||
parent=None, label=DocItemLabel.TITLE, text=text
|
parent=None, label=DocItemLabel.TITLE, text=text
|
||||||
)
|
)
|
||||||
|
|
||||||
elif "Heading" in p_style_id:
|
elif "Heading" in p_style_id:
|
||||||
self.add_header(element, docx_obj, doc, p_style_id, p_level, text)
|
self.add_header(element, docx_obj, doc, p_style_id, p_level, text)
|
||||||
|
|
||||||
elif p_style_id in [
|
elif p_style_id in [
|
||||||
"Paragraph",
|
|
||||||
"Normal",
|
|
||||||
"Subtitle",
|
"Subtitle",
|
||||||
"Author",
|
"Author",
|
||||||
"DefaultText",
|
|
||||||
"ListParagraph",
|
"ListParagraph",
|
||||||
"ListBullet",
|
"ListBullet",
|
||||||
"Quote",
|
"Quote",
|
||||||
@ -305,12 +304,32 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
label=DocItemLabel.PARAGRAPH, parent=self.parents[level - 1], text=text
|
label=DocItemLabel.PARAGRAPH, parent=self.parents[level - 1], text=text
|
||||||
)
|
)
|
||||||
|
|
||||||
|
elif (raw_text is None or len(raw_text) == 0) and len(text) > 0:
|
||||||
|
# Standalone equation
|
||||||
|
# Entities in which all text comes from equations
|
||||||
|
level = self.get_level()
|
||||||
|
if text.strip().startswith("$") and text.strip().endswith("$"):
|
||||||
|
text = text.strip()[1:-1]
|
||||||
|
doc.add_text(
|
||||||
|
label=DocItemLabel.FORMULA, parent=self.parents[level - 1], text=text
|
||||||
|
)
|
||||||
|
|
||||||
|
elif p_style_id in [
|
||||||
|
"Paragraph",
|
||||||
|
"Normal",
|
||||||
|
"DefaultText",
|
||||||
|
]:
|
||||||
|
level = self.get_level()
|
||||||
|
doc.add_text(
|
||||||
|
label=DocItemLabel.PARAGRAPH, parent=self.parents[level - 1], text=text
|
||||||
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Text style names can, and will have, not only default values but user values too
|
# Text style names can, and will have, not only default values but user values too
|
||||||
# hence we treat all other labels as pure text
|
# hence we treat all other labels as pure text
|
||||||
level = self.get_level()
|
level = self.get_level()
|
||||||
doc.add_text(
|
doc.add_text(
|
||||||
label=DocItemLabel.PARAGRAPH, parent=self.parents[level - 1], text=text
|
label=DocItemLabel.TEXT, parent=self.parents[level - 1], text=text
|
||||||
)
|
)
|
||||||
self.update_history(p_style_id, p_level, numid, ilevel)
|
self.update_history(p_style_id, p_level, numid, ilevel)
|
||||||
return
|
return
|
||||||
|
Loading…
Reference in New Issue
Block a user