mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
Apply suggestion
Signed-off-by: ka-weihe <k@weihe.dk>
This commit is contained in:
parent
baa6d87ed1
commit
61a5d95b20
@ -541,25 +541,19 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def handle_anchor(self, element: Tag, doc: DoclingDocument) -> None:
|
def handle_anchor(self, element: Tag, doc: DoclingDocument) -> None:
|
||||||
"""Handles anchor tags (<a>) by extracting the visible text and href attribute."""
|
"""Handles anchor tags (<a>) by extracting the visible text and setting the hyperlink property."""
|
||||||
# Extract the anchor text and the URL (href attribute)
|
# Extract the visible text and href URL
|
||||||
text = element.get_text().strip()
|
text = element.get_text().strip()
|
||||||
href = element.get("href", "").strip()
|
href = element.get("href", "").strip()
|
||||||
|
|
||||||
if text:
|
# If no text is present, use the hyperlink itself as the text.
|
||||||
# Combine the text with the hyperlink if available
|
display_text = text if text else href
|
||||||
combined_text = f"{text} (Link: {href})" if href else text
|
|
||||||
|
if display_text or href:
|
||||||
doc.add_text(
|
doc.add_text(
|
||||||
parent=self.parents[self.level],
|
parent=self.parents[self.level],
|
||||||
label=DocItemLabel.TEXT,
|
label=DocItemLabel.TEXT,
|
||||||
text=combined_text,
|
text=display_text,
|
||||||
content_layer=self.content_layer,
|
hyperlink=href if href else None, # Pass the hyperlink as a separate parameter
|
||||||
)
|
|
||||||
elif href:
|
|
||||||
# If no visible text, add the link itself
|
|
||||||
doc.add_text(
|
|
||||||
parent=self.parents[self.level],
|
|
||||||
label=DocItemLabel.TEXT,
|
|
||||||
text=f"Link: {href}",
|
|
||||||
content_layer=self.content_layer,
|
content_layer=self.content_layer,
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user