feat: Add content_layer property to items to address body, furniture and other roles (#735)

* feat: Pass predicted page-headers and page-footers through to DoclingDocument furniture

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* chore: Update all test GT

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* fix: update all test cases

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* fix: update all test cases again

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Update lock

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Update lock to final docling-core

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-02-10 12:07:49 +01:00
committed by GitHub
parent 3e26597995
commit cf78d5b7b9
43 changed files with 2082 additions and 198 deletions

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.0.0",
"version": "1.1.0",
"name": "unit_test_headers",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -10,6 +10,7 @@
"furniture": {
"self_ref": "#/furniture",
"children": [],
"content_layer": "furniture",
"name": "_root_",
"label": "unspecified"
},
@@ -20,6 +21,7 @@
"$ref": "#/texts/0"
}
],
"content_layer": "body",
"name": "_root_",
"label": "unspecified"
},
@@ -34,6 +36,7 @@
"$ref": "#/texts/33"
}
],
"content_layer": "body",
"name": "header-2",
"label": "section"
}
@@ -55,6 +58,7 @@
"$ref": "#/texts/27"
}
],
"content_layer": "body",
"label": "title",
"prov": [],
"orig": "Test Document",
@@ -66,6 +70,7 @@
"$ref": "#/texts/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -99,6 +104,7 @@
"$ref": "#/texts/14"
}
],
"content_layer": "body",
"label": "section_header",
"prov": [],
"orig": "Section 1",
@@ -111,6 +117,7 @@
"$ref": "#/texts/2"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -122,6 +129,7 @@
"$ref": "#/texts/2"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.1",
@@ -133,6 +141,7 @@
"$ref": "#/texts/2"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -144,6 +153,7 @@
"$ref": "#/texts/2"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.2",
@@ -155,6 +165,7 @@
"$ref": "#/texts/2"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -182,6 +193,7 @@
"$ref": "#/texts/13"
}
],
"content_layer": "body",
"label": "section_header",
"prov": [],
"orig": "Section 1.1",
@@ -194,6 +206,7 @@
"$ref": "#/texts/8"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -205,6 +218,7 @@
"$ref": "#/texts/8"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.1.1",
@@ -216,6 +230,7 @@
"$ref": "#/texts/8"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -227,6 +242,7 @@
"$ref": "#/texts/8"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.1.2",
@@ -238,6 +254,7 @@
"$ref": "#/texts/8"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -268,6 +285,7 @@
"$ref": "#/texts/20"
}
],
"content_layer": "body",
"label": "section_header",
"prov": [],
"orig": "Section 1.2",
@@ -280,6 +298,7 @@
"$ref": "#/texts/14"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -291,6 +310,7 @@
"$ref": "#/texts/14"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.1.1",
@@ -302,6 +322,7 @@
"$ref": "#/texts/14"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -313,6 +334,7 @@
"$ref": "#/texts/14"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.1.2",
@@ -324,6 +346,7 @@
"$ref": "#/texts/14"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -354,6 +377,7 @@
"$ref": "#/texts/26"
}
],
"content_layer": "body",
"label": "section_header",
"prov": [],
"orig": "Section 1.2.3",
@@ -366,6 +390,7 @@
"$ref": "#/texts/20"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -377,6 +402,7 @@
"$ref": "#/texts/20"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.2.3.1",
@@ -388,6 +414,7 @@
"$ref": "#/texts/20"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -399,6 +426,7 @@
"$ref": "#/texts/20"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.2.3.1",
@@ -410,6 +438,7 @@
"$ref": "#/texts/20"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -421,6 +450,7 @@
"$ref": "#/texts/20"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -454,6 +484,7 @@
"$ref": "#/texts/39"
}
],
"content_layer": "body",
"label": "section_header",
"prov": [],
"orig": "Section 2",
@@ -466,6 +497,7 @@
"$ref": "#/texts/27"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -477,6 +509,7 @@
"$ref": "#/texts/27"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1",
@@ -488,6 +521,7 @@
"$ref": "#/texts/27"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -499,6 +533,7 @@
"$ref": "#/texts/27"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.2",
@@ -510,6 +545,7 @@
"$ref": "#/texts/27"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -537,6 +573,7 @@
"$ref": "#/texts/38"
}
],
"content_layer": "body",
"label": "section_header",
"prov": [],
"orig": "Section 2.1.1",
@@ -549,6 +586,7 @@
"$ref": "#/texts/33"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -560,6 +598,7 @@
"$ref": "#/texts/33"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1.1.1",
@@ -571,6 +610,7 @@
"$ref": "#/texts/33"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -582,6 +622,7 @@
"$ref": "#/texts/33"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1.1.1",
@@ -593,6 +634,7 @@
"$ref": "#/texts/33"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -623,6 +665,7 @@
"$ref": "#/texts/45"
}
],
"content_layer": "body",
"label": "section_header",
"prov": [],
"orig": "Section 2.1",
@@ -635,6 +678,7 @@
"$ref": "#/texts/39"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -646,6 +690,7 @@
"$ref": "#/texts/39"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1.1",
@@ -657,6 +702,7 @@
"$ref": "#/texts/39"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -668,6 +714,7 @@
"$ref": "#/texts/39"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1.2",
@@ -679,6 +726,7 @@
"$ref": "#/texts/39"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
@@ -690,6 +738,7 @@
"$ref": "#/texts/39"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",