mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
fix(markdown): add support for HTML content (#855)
Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
This commit is contained in:
25
tests/data/groundtruth/docling_v2/mixed.md.md
Normal file
25
tests/data/groundtruth/docling_v2/mixed.md.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# Title
|
||||
|
||||
Some text
|
||||
|
||||
## Famous ducks
|
||||
|
||||
Here is a table:
|
||||
|
||||
| Character | Name in German | Name in French | Name in Italian |
|
||||
|----------------|------------------|------------------|-------------------|
|
||||
| Scrooge McDuck | Dagobert Duck | Balthazar Picsou | Paperone |
|
||||
| Huey | Tick | Riri | Qui |
|
||||
| Dewey | Trick | Fifi | Quo |
|
||||
| Louie | Track | Loulou | Qua |
|
||||
|
||||
And here is more HTML:
|
||||
|
||||
Some paragraph.
|
||||
|
||||
Now a div — almost there...
|
||||
|
||||
- foo
|
||||
- bar
|
||||
|
||||
The end!
|
||||
@@ -53,6 +53,20 @@
|
||||
table tr:nth-child(even) td{
|
||||
background-color: LightGray;
|
||||
}
|
||||
math annotation {
|
||||
display: none;
|
||||
}
|
||||
.formula-not-decoded {
|
||||
background: repeating-linear-gradient(
|
||||
45deg, /* Angle of the stripes */
|
||||
LightGray, /* First color */
|
||||
LightGray 10px, /* Length of the first color */
|
||||
White 10px, /* Second color */
|
||||
White 20px /* Length of the second color */
|
||||
);
|
||||
margin: 0;
|
||||
text-align: center;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<h2>Test with tables</h2>
|
||||
|
||||
54
tests/data/md/mixed.md
Normal file
54
tests/data/md/mixed.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# Title
|
||||
|
||||
Some text
|
||||
|
||||
## Famous ducks
|
||||
|
||||
Here is a table:
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th>Character</th>
|
||||
<th>Name in German</th>
|
||||
<th>Name in French</th>
|
||||
<th>Name in Italian</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Scrooge McDuck</td>
|
||||
<td>Dagobert Duck</td>
|
||||
<td>Balthazar Picsou</td>
|
||||
<td>Paperone</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Huey</td>
|
||||
<td>Tick</td>
|
||||
<td>Riri</td>
|
||||
<td>Qui</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Dewey</td>
|
||||
<td>Trick</td>
|
||||
<td>Fifi</td>
|
||||
<td>Quo</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Louie</td>
|
||||
<td>Track</td>
|
||||
<td>Loulou</td>
|
||||
<td>Qua</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
And here is more HTML:
|
||||
|
||||
<p>Some paragraph.</p>
|
||||
|
||||
<div>
|
||||
<p>Now a div — almost there...</p>
|
||||
<ul>
|
||||
<li>foo</li>
|
||||
<li>bar</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
The end!
|
||||
Reference in New Issue
Block a user