fix: update all test cases again

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2025-02-04 14:40:16 +01:00
parent c9b0b5aff3
commit 24163b02d1
44 changed files with 44 additions and 44 deletions

2
poetry.lock generated
View File

@ -887,7 +887,7 @@ chunking = ["semchunk (>=2.2.0,<3.0.0)", "transformers (>=4.34.0,<5.0.0)"]
type = "git" type = "git"
url = "ssh://git@github.com/DS4SD/docling-core.git" url = "ssh://git@github.com/DS4SD/docling-core.git"
reference = "cau/add-content-layer" reference = "cau/add-content-layer"
resolved_reference = "4c19ae7f7fb128a12173c50a9ab376114f70a689" resolved_reference = "ae3748b1f2e526698c1ed136cec7dc0502f28209"
[[package]] [[package]]
name = "docling-ibm-models" name = "docling-ibm-models"

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "elife-56337", "name": "elife-56337",
"origin": { "origin": {
"mimetype": "application/xml", "mimetype": "application/xml",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "example_01", "name": "example_01",
"origin": { "origin": {
"mimetype": "text/html", "mimetype": "text/html",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "example_02", "name": "example_02",
"origin": { "origin": {
"mimetype": "text/html", "mimetype": "text/html",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "example_03", "name": "example_03",
"origin": { "origin": {
"mimetype": "text/html", "mimetype": "text/html",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "example_04", "name": "example_04",
"origin": { "origin": {
"mimetype": "text/html", "mimetype": "text/html",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "example_05", "name": "example_05",
"origin": { "origin": {
"mimetype": "text/html", "mimetype": "text/html",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "ipa20180000016.xml", "name": "ipa20180000016.xml",
"origin": { "origin": {
"mimetype": "application/xml", "mimetype": "application/xml",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "ipa20200022300.xml", "name": "ipa20200022300.xml",
"origin": { "origin": {
"mimetype": "application/xml", "mimetype": "application/xml",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "lorem_ipsum", "name": "lorem_ipsum",
"origin": { "origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "pa20010031492.xml", "name": "pa20010031492.xml",
"origin": { "origin": {
"mimetype": "application/xml", "mimetype": "application/xml",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "pftaps057006474.txt", "name": "pftaps057006474.txt",
"origin": { "origin": {
"mimetype": "text/plain", "mimetype": "text/plain",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "pg06442728.xml", "name": "pg06442728.xml",
"origin": { "origin": {
"mimetype": "application/xml", "mimetype": "application/xml",

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "pntd.0008301", "name": "pntd.0008301",
"origin": { "origin": {
"mimetype": "application/xml", "mimetype": "application/xml",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "pone.0234687", "name": "pone.0234687",
"origin": { "origin": {
"mimetype": "application/xml", "mimetype": "application/xml",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "powerpoint_sample", "name": "powerpoint_sample",
"origin": { "origin": {
"mimetype": "application/vnd.ms-powerpoint", "mimetype": "application/vnd.ms-powerpoint",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "powerpoint_with_image", "name": "powerpoint_with_image",
"origin": { "origin": {
"mimetype": "application/vnd.ms-powerpoint", "mimetype": "application/vnd.ms-powerpoint",

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "tablecell", "name": "tablecell",
"origin": { "origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "test-01", "name": "test-01",
"origin": { "origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "test_emf_docx", "name": "test_emf_docx",
"origin": { "origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "unit_test_01", "name": "unit_test_01",
"origin": { "origin": {
"mimetype": "text/html", "mimetype": "text/html",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "unit_test_headers", "name": "unit_test_headers",
"origin": { "origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "unit_test_headers_numbered", "name": "unit_test_headers_numbered",
"origin": { "origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "unit_test_lists", "name": "unit_test_lists",
"origin": { "origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "wiki_duck", "name": "wiki_duck",
"origin": { "origin": {
"mimetype": "text/html", "mimetype": "text/html",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "word_sample", "name": "word_sample",
"origin": { "origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",

View File

@ -1,6 +1,6 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.0.0", "version": "1.1.0",
"name": "word_tables", "name": "word_tables",
"origin": { "origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",

View File

@ -1 +1 @@
{"schema_name": "DoclingDocument", "version": "1.0.0", "name": "ocr_test", "origin": {"mimetype": "application/pdf", "binary_hash": 14853448746796404529, "filename": "ocr_test.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}], "content_layer": "body", "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 69.6796646118164, "t": 764.9216918945312, "r": 504.87200927734375, "b": 689.012451171875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 94]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "pictures": [], "tables": [], "key_value_items": [], "pages": {"1": {"size": {"width": 595.201171875, "height": 841.9216918945312}, "image": null, "page_no": 1}}} {"schema_name": "DoclingDocument", "version": "1.1.0", "name": "ocr_test", "origin": {"mimetype": "application/pdf", "binary_hash": 14853448746796404529, "filename": "ocr_test.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}], "content_layer": "body", "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 69.6796646118164, "t": 764.9216918945312, "r": 504.87200927734375, "b": 689.012451171875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 94]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "pictures": [], "tables": [], "key_value_items": [], "pages": {"1": {"size": {"width": 595.201171875, "height": 841.9216918945312}, "image": null, "page_no": 1}}}