mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
fix(html): handle address, details, and summary tags (#1436)
* fix(html): handle 'address' tag Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> * fix(html): handle 'details' tag Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> --------- Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
c2470ed216
commit
ed20124544
@@ -4,4 +4,7 @@ item-0 at level 0: unspecified: group _root_
|
||||
item-3 at level 1: text: This is a regular paragraph.
|
||||
item-4 at level 1: text: This is a third div
|
||||
with a new line.
|
||||
item-5 at level 1: text: This is a fourth div with a bold paragraph.
|
||||
item-5 at level 1: section: group details
|
||||
item-6 at level 2: text: Heading for the details element
|
||||
item-7 at level 2: text: Description of the details element.
|
||||
item-8 at level 1: text: This is a fourth div with a bold paragraph.
|
||||
@@ -4,7 +4,7 @@
|
||||
"name": "example_06",
|
||||
"origin": {
|
||||
"mimetype": "text/html",
|
||||
"binary_hash": 14574683870626799530,
|
||||
"binary_hash": 10224930410364781672,
|
||||
"filename": "example_06.html"
|
||||
},
|
||||
"furniture": {
|
||||
@@ -30,14 +30,35 @@
|
||||
"$ref": "#/texts/3"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/4"
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/6"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
"groups": [],
|
||||
"groups": [
|
||||
{
|
||||
"self_ref": "#/groups/0",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/4"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/5"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "details",
|
||||
"label": "section"
|
||||
}
|
||||
],
|
||||
"texts": [
|
||||
{
|
||||
"self_ref": "#/texts/0",
|
||||
@@ -89,6 +110,30 @@
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/4",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "text",
|
||||
"prov": [],
|
||||
"orig": "Heading for the details element",
|
||||
"text": "Heading for the details element"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/5",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "text",
|
||||
"prov": [],
|
||||
"orig": "Description of the details element.",
|
||||
"text": "Description of the details element."
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/6",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
|
||||
@@ -7,4 +7,8 @@ This is a regular paragraph.
|
||||
This is a third div
|
||||
with a new line.
|
||||
|
||||
Heading for the details element
|
||||
|
||||
Description of the details element.
|
||||
|
||||
This is a fourth div with a bold paragraph.
|
||||
@@ -7,6 +7,10 @@
|
||||
<div>This is another div with text.</div>
|
||||
<p>This is a regular paragraph.</p>
|
||||
<div>This is a third div<br/>with a new line.</div>
|
||||
<details>
|
||||
<summary>Heading for the details element</summary>
|
||||
<p>Description of the details element.</p>
|
||||
</details>
|
||||
<div><p>This is a fourth div with a <b>bold</b> paragraph.</p></div>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
Reference in New Issue
Block a user