mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
feat(html): Support in-line anchor tags in HTML texts (#1659)
* re-implement links for html backend. Signed-off-by: Roman Kayan BAZG <roman.kayan@bazg.admin.ch> * fix inline groups in list items. write specific test for find_parent_annotation of _extract_text_and_hyperlink_recursively. Signed-off-by: Roman Kayan BAZG <roman.kayan@bazg.admin.ch> * implement hack for images. Signed-off-by: Roman Kayan BAZG <roman.kayan@bazg.admin.ch> --------- Signed-off-by: Roman Kayan BAZG <roman.kayan@bazg.admin.ch>
This commit is contained in:
17
tests/data/html/hyperlink_01.html
vendored
Normal file
17
tests/data/html/hyperlink_01.html
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
<html>
|
||||
|
||||
<body>
|
||||
<h1>Something</h1>
|
||||
<p>
|
||||
Please follow the link to:
|
||||
<a href="#">
|
||||
<span class="icon icon--right"></span> This page
|
||||
</a>
|
||||
.
|
||||
</p>
|
||||
<div class="mod mod-contentpage">
|
||||
|
||||
</div>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
18
tests/data/html/hyperlink_02.html
vendored
Normal file
18
tests/data/html/hyperlink_02.html
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
<html>
|
||||
|
||||
<body>
|
||||
<div class="nav-mobile-header">
|
||||
<div class="table-row">
|
||||
<span class="nav-mobile-logo">
|
||||
<img src="/etc/designs/core/frontend/guidelines/img/xyz.svg"
|
||||
onerror="this.onerror=null; this.src='/etc/designs/core/frontend/guidelines/img/xyz.png'"
|
||||
alt="Image alt text" />
|
||||
</span>
|
||||
<h2>
|
||||
<a href="/home.html" title="My home page " aria-label="My home page ">Home</a>
|
||||
</h2>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
40
tests/data/html/hyperlink_03.html
vendored
Normal file
40
tests/data/html/hyperlink_03.html
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
<html>
|
||||
|
||||
<body>
|
||||
<ul class="nav navbar-nav">
|
||||
<li class="dropdown">
|
||||
<a id="main-dropdown" href="#" aria-label="My Section" class="dropdown-toggle" data-toggle="dropdown"><span
|
||||
class="icon icon--right"></span> My Section</a>
|
||||
<ul class="dropdown-menu" role="menu">
|
||||
<li class="dropdown-header">
|
||||
<a href="/start.html" aria-label="Some page" target="_blank" title="">Some
|
||||
page</a>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="/home2.html" aria-label="Some other page" target="_blank" title=""> A sub page</a>
|
||||
</li>
|
||||
</ul>
|
||||
<ul>
|
||||
<li>This is my <a href="/home.html">Homepage</a>
|
||||
<ul>
|
||||
<li>
|
||||
List item inner
|
||||
</li>
|
||||
<li>
|
||||
More text <a href="/some_links.html">with some links</a> and more text.
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a href="#main-navigation">Main navigation</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="dropdown">
|
||||
<a id="other-dropdown" href="#" aria-label="My Org" class="dropdown-toggle"><span
|
||||
class="icon icon--right"></span> My organisation</a>
|
||||
</li>
|
||||
</ul>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
11
tests/data/html/hyperlink_04.html
vendored
Normal file
11
tests/data/html/hyperlink_04.html
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
<html>
|
||||
|
||||
<body>
|
||||
<a href="/start.html">
|
||||
<div>
|
||||
This is some text.
|
||||
</div>
|
||||
</a>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
36
tests/data/html/hyperlink_05.html
vendored
Normal file
36
tests/data/html/hyperlink_05.html
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Image Hyperlink and Caption Example</title>
|
||||
<style>
|
||||
body { font-family: Arial, sans-serif; margin: 2em; }
|
||||
figure { max-width: 320px; margin: 2em 0; }
|
||||
figcaption { background: #f3f3f3; color: #333; padding: 6px; text-align: center; font-style: italic; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<!-- Image as a hyperlink -->
|
||||
<a href="https://www.example.com" target="_blank">
|
||||
<img src="https://via.placeholder.com/200x100.png?text=Clickable+Image" alt="Clickable Example" />
|
||||
</a>
|
||||
|
||||
<!-- Image with a caption -->
|
||||
<figure>
|
||||
<img src="https://via.placeholder.com/250x150.png?text=Captioned+Image" alt="Image with Caption" />
|
||||
<figcaption>This is an example caption for the image.</figcaption>
|
||||
</figure>
|
||||
|
||||
|
||||
<!-- Image with a caption -->
|
||||
<figure>
|
||||
<a href="https://www.example.com" target="_blank">
|
||||
<img src="https://via.placeholder.com/250x150.png?text=Captioned+Image" alt="Image with Caption" />
|
||||
</a>
|
||||
<figcaption>This is an example <a href="#caption">caption</a> for the image.</figcaption>
|
||||
</figure>
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user