mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-28 13:04:25 +00:00
Handle hyperlink
Signed-off-by: SimJeg <sjegou@nvidia.com>
This commit is contained in:
parent
23fa9b9902
commit
d3362d1553
@ -303,12 +303,14 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
paragraph_text = ""
|
paragraph_text = ""
|
||||||
group_text = ""
|
group_text = ""
|
||||||
previous_format = None
|
previous_format = None
|
||||||
|
hyperlink = None
|
||||||
|
|
||||||
# Iterate over the runs of the paragraph and group them by format
|
# Iterate over the runs of the paragraph and group them by format
|
||||||
for c in paragraph.iter_inner_content():
|
for c in paragraph.iter_inner_content():
|
||||||
if isinstance(c, Hyperlink):
|
if isinstance(c, Hyperlink):
|
||||||
text = f"[{c.text}]({c.address})"
|
text = f"[{c.text}]({c.address})"
|
||||||
format = self.get_format_from_run(c.runs[0])
|
format = self.get_format_from_run(c.runs[0])
|
||||||
|
hyperlink = c.address
|
||||||
elif isinstance(c, Run):
|
elif isinstance(c, Run):
|
||||||
text = c.text
|
text = c.text
|
||||||
format = self.get_format_from_run(c)
|
format = self.get_format_from_run(c)
|
||||||
@ -319,11 +321,11 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
previous_format = previous_format or format
|
previous_format = previous_format or format
|
||||||
|
|
||||||
# If the style changes for a non empty text, format the group and reset it
|
# If the style changes for a non empty text, format the group and reset it
|
||||||
if len(text.strip()) and (format != previous_format):
|
if (len(text.strip()) and (format != previous_format)) or (hyperlink is not None):
|
||||||
previous_text = self.format_text(group_text, previous_format)
|
paragraph_text += self.format_text(group_text, previous_format)
|
||||||
paragraph_text += previous_text
|
|
||||||
previous_format = format
|
previous_format = format
|
||||||
group_text = ""
|
group_text = ""
|
||||||
|
hyperlink = None
|
||||||
|
|
||||||
group_text += text
|
group_text += text
|
||||||
|
|
||||||
@ -331,6 +333,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
if len(group_text.strip()) > 0:
|
if len(group_text.strip()) > 0:
|
||||||
paragraph_text += self.format_text(group_text, format)
|
paragraph_text += self.format_text(group_text, format)
|
||||||
|
|
||||||
|
#TODO: return a list of tuple (text, format, hyperlink) instead of a single string
|
||||||
return paragraph_text.strip()
|
return paragraph_text.strip()
|
||||||
|
|
||||||
def handle_equations_in_text(self, element, text):
|
def handle_equations_in_text(self, element, text):
|
||||||
|
Loading…
Reference in New Issue
Block a user