feat: leverage new list modeling, capture default markers (#1856)

* chore: update docling-core & regenerate test data

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* update backends to leverage new list modeling

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* repin docling-core

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* ensure availability of latest docling-core API

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

---------

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>
This commit is contained in:
Panos Vagenas
2025-06-27 16:37:15 +02:00
committed by GitHub
parent e79e4f0ab6
commit 0533da1923
90 changed files with 2252 additions and 2240 deletions

View File

@@ -1,10 +1,10 @@
{
"schema_name": "DoclingDocument",
"version": "1.4.0",
"version": "1.5.0",
"name": "example_01",
"origin": {
"mimetype": "text/html",
"binary_hash": 13782069548509991617,
"binary_hash": 13726679883013609282,
"filename": "example_01.html"
},
"furniture": {
@@ -58,7 +58,24 @@
],
"content_layer": "body",
"name": "ordered list",
"label": "ordered_list"
"label": "list"
},
{
"self_ref": "#/groups/2",
"parent": {
"$ref": "#/texts/2"
},
"children": [
{
"$ref": "#/texts/8"
},
{
"$ref": "#/texts/9"
}
],
"content_layer": "body",
"name": "ordered list start 42",
"label": "list"
}
],
"texts": [
@@ -110,6 +127,9 @@
},
{
"$ref": "#/groups/1"
},
{
"$ref": "#/groups/2"
}
],
"content_layer": "body",
@@ -143,7 +163,7 @@
"orig": "First item in unordered list",
"text": "First item in unordered list",
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/5",
@@ -157,7 +177,7 @@
"orig": "Second item in unordered list",
"text": "Second item in unordered list",
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/6",
@@ -171,7 +191,7 @@
"orig": "First item in ordered list",
"text": "First item in ordered list",
"enumerated": true,
"marker": "1."
"marker": ""
},
{
"self_ref": "#/texts/7",
@@ -185,7 +205,35 @@
"orig": "Second item in ordered list",
"text": "Second item in ordered list",
"enumerated": true,
"marker": "2."
"marker": ""
},
{
"self_ref": "#/texts/8",
"parent": {
"$ref": "#/groups/2"
},
"children": [],
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "First item in ordered list with start",
"text": "First item in ordered list with start",
"enumerated": true,
"marker": "42."
},
{
"self_ref": "#/texts/9",
"parent": {
"$ref": "#/groups/2"
},
"children": [],
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "Second item in ordered list with start",
"text": "Second item in ordered list with start",
"enumerated": true,
"marker": "43."
}
],
"pictures": [