docling/tests/data/groundtruth/docling_v2/mixed_without_h1.md.yaml
Cesar Berrospi Ramis 82940c47a6 fix(markdown): ensure correct parsing of nested lists
Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
2025-07-25 13:53:26 +02:00

140 lines
2.5 KiB
YAML
Vendored

body:
children:
- $ref: '#/texts/0'
- $ref: '#/texts/1'
- $ref: '#/groups/0'
content_layer: body
label: unspecified
name: _root_
self_ref: '#/body'
form_items: []
furniture:
children: []
content_layer: furniture
label: unspecified
name: _root_
self_ref: '#/furniture'
groups:
- children:
- $ref: '#/texts/2'
content_layer: body
label: section
name: header-1
parent:
$ref: '#/body'
self_ref: '#/groups/0'
- children:
- $ref: '#/texts/3'
- $ref: '#/texts/5'
- $ref: '#/texts/6'
content_layer: body
label: list
name: list
parent:
$ref: '#/texts/2'
self_ref: '#/groups/1'
- children:
- $ref: '#/texts/4'
content_layer: body
label: list
name: list
parent:
$ref: '#/texts/3'
self_ref: '#/groups/2'
key_value_items: []
name: mixed_without_h1
origin:
binary_hash: 7394721163373597328
filename: mixed_without_h1.md
mimetype: text/html
pages: {}
pictures: []
schema_name: DoclingDocument
tables: []
texts:
- children: []
content_layer: furniture
label: title
orig: mixed_without_h1
parent:
$ref: '#/body'
prov: []
self_ref: '#/texts/0'
text: mixed_without_h1
- children: []
content_layer: furniture
label: text
orig: Content before first heading
parent:
$ref: '#/body'
prov: []
self_ref: '#/texts/1'
text: Content before first heading
- children:
- $ref: '#/groups/1'
- $ref: '#/texts/7'
content_layer: body
label: section_header
level: 1
orig: Some heading
parent:
$ref: '#/groups/0'
prov: []
self_ref: '#/texts/2'
text: Some heading
- children:
- $ref: '#/groups/2'
content_layer: body
enumerated: false
label: list_item
marker: ''
orig: A. first
parent:
$ref: '#/groups/1'
prov: []
self_ref: '#/texts/3'
text: A. first
- children: []
content_layer: body
enumerated: false
label: list_item
marker: ''
orig: subitem
parent:
$ref: '#/groups/2'
prov: []
self_ref: '#/texts/4'
text: subitem
- children: []
content_layer: body
enumerated: false
label: list_item
marker: ''
orig: B. second
parent:
$ref: '#/groups/1'
prov: []
self_ref: '#/texts/5'
text: B. second
- children: []
content_layer: body
enumerated: false
label: list_item
marker: ''
orig: 2 . strange
parent:
$ref: '#/groups/1'
prov: []
self_ref: '#/texts/6'
text: 2 . strange
- children: []
content_layer: body
label: text
orig: The end!
parent:
$ref: '#/texts/2'
prov: []
self_ref: '#/texts/7'
text: The end!
version: 1.5.0