Apply suggestion

Signed-off-by: ka-weihe <k@weihe.dk>
This commit is contained in:
ka-weihe 2025-04-16 12:59:26 +02:00 committed by GitHub
parent baa6d87ed1
commit 61a5d95b20
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -541,25 +541,19 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
) )
def handle_anchor(self, element: Tag, doc: DoclingDocument) -> None: def handle_anchor(self, element: Tag, doc: DoclingDocument) -> None:
"""Handles anchor tags (<a>) by extracting the visible text and href attribute.""" """Handles anchor tags (<a>) by extracting the visible text and setting the hyperlink property."""
# Extract the anchor text and the URL (href attribute) # Extract the visible text and href URL
text = element.get_text().strip() text = element.get_text().strip()
href = element.get("href", "").strip() href = element.get("href", "").strip()
if text: # If no text is present, use the hyperlink itself as the text.
# Combine the text with the hyperlink if available display_text = text if text else href
combined_text = f"{text} (Link: {href})" if href else text
if display_text or href:
doc.add_text( doc.add_text(
parent=self.parents[self.level], parent=self.parents[self.level],
label=DocItemLabel.TEXT, label=DocItemLabel.TEXT,
text=combined_text, text=display_text,
content_layer=self.content_layer, hyperlink=href if href else None, # Pass the hyperlink as a separate parameter
)
elif href:
# If no visible text, add the link itself
doc.add_text(
parent=self.parents[self.level],
label=DocItemLabel.TEXT,
text=f"Link: {href}",
content_layer=self.content_layer, content_layer=self.content_layer,
) )