mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
fix(html): tackle paragraphs with block-level elements (#2720)
Fix p elements having block-level elements anywhere inside as browsers do. Fix wrong type annotations. Signed-off-by: Cesar Berrospi Ramis <ceb@zurich.ibm.com>
This commit is contained in:
committed by
GitHub
parent
aebe25cf00
commit
d007ba0e6f
99
tests/data/html/html_heading_in_p.html
vendored
Normal file
99
tests/data/html/html_heading_in_p.html
vendored
Normal file
@@ -0,0 +1,99 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Headings inside paragraphs in HTML</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<p>
|
||||
<h1 id="1"><span style="color: rgb(0,0,0);">1</span></h1>
|
||||
<div>
|
||||
<p>1st paragraph<br /></p>
|
||||
<div>
|
||||
<table>
|
||||
<colgroup>
|
||||
<col />
|
||||
<col />
|
||||
<col />
|
||||
</colgroup>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>
|
||||
<p><strong><span style="color: rgb(0,0,0);">2</span></strong></p>
|
||||
</td>
|
||||
<td>
|
||||
<p><strong><span style="color: rgb(0,0,0);">3</span></strong></p>
|
||||
</td>
|
||||
<td>
|
||||
<p><strong><span style="color: rgb(0,0,0);">4</span></strong></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<p><span style="color: rgb(0,0,0);">5</span></p>
|
||||
</td>
|
||||
<td>
|
||||
<p><span style="color: rgb(0,0,0);">6</span>
|
||||
</p>
|
||||
</td>
|
||||
<td>
|
||||
<p><span style="color: rgb(0,0,0);">7</span></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<p><span style="color: rgb(0,0,0);">8</span></p>
|
||||
</td>
|
||||
<td>
|
||||
<p><span style="color: rgb(0,0,0);">9</span></p>
|
||||
</td>
|
||||
<td>
|
||||
<p><span style="color: rgb(0,0,0);">10</span></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<p><span style="color: rgb(0,0,0);">11</span></p>
|
||||
</td>
|
||||
<td>
|
||||
<p><span style="color: rgb(0,0,0);">12</span></p>
|
||||
</td>
|
||||
<td>
|
||||
<p><span style="color: rgb(0,0,0);">13</span></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<p><span style="color: rgb(255,0,255);">14</span></p>
|
||||
</td>
|
||||
<td>
|
||||
<p><span style="color: rgb(0,0,0);">15</span></p>
|
||||
</td>
|
||||
<td>
|
||||
<p><span style="color: rgb(0,0,0);">16</span></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<p><span style="color: rgb(255,0,255);">17</span></p>
|
||||
</td>
|
||||
<td>
|
||||
<p><span style="color: rgb(0,0,0);">18</span></p>
|
||||
</td>
|
||||
<td>
|
||||
<p><span style="color: rgb(0,0,0);">19</span></p>
|
||||
<p><span style="color: rgb(0,0,0);">20</span></p>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<h1 id="21"><span style="color: rgb(0,0,0);">21</span></h1>
|
||||
</div>
|
||||
<br /></p>
|
||||
<p>2nd paragraph</p>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
Reference in New Issue
Block a user