mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-02 15:32:30 +00:00
fix: update all test cases again
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
c9b0b5aff3
commit
24163b02d1
2
poetry.lock
generated
2
poetry.lock
generated
@ -887,7 +887,7 @@ chunking = ["semchunk (>=2.2.0,<3.0.0)", "transformers (>=4.34.0,<5.0.0)"]
|
|||||||
type = "git"
|
type = "git"
|
||||||
url = "ssh://git@github.com/DS4SD/docling-core.git"
|
url = "ssh://git@github.com/DS4SD/docling-core.git"
|
||||||
reference = "cau/add-content-layer"
|
reference = "cau/add-content-layer"
|
||||||
resolved_reference = "4c19ae7f7fb128a12173c50a9ab376114f70a689"
|
resolved_reference = "ae3748b1f2e526698c1ed136cec7dc0502f28209"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "docling-ibm-models"
|
name = "docling-ibm-models"
|
||||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "elife-56337",
|
"name": "elife-56337",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/xml",
|
"mimetype": "application/xml",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "example_01",
|
"name": "example_01",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "text/html",
|
"mimetype": "text/html",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "example_02",
|
"name": "example_02",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "text/html",
|
"mimetype": "text/html",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "example_03",
|
"name": "example_03",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "text/html",
|
"mimetype": "text/html",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "example_04",
|
"name": "example_04",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "text/html",
|
"mimetype": "text/html",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "example_05",
|
"name": "example_05",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "text/html",
|
"mimetype": "text/html",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "ipa20180000016.xml",
|
"name": "ipa20180000016.xml",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/xml",
|
"mimetype": "application/xml",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "ipa20200022300.xml",
|
"name": "ipa20200022300.xml",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/xml",
|
"mimetype": "application/xml",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "lorem_ipsum",
|
"name": "lorem_ipsum",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "pa20010031492.xml",
|
"name": "pa20010031492.xml",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/xml",
|
"mimetype": "application/xml",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "pftaps057006474.txt",
|
"name": "pftaps057006474.txt",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "text/plain",
|
"mimetype": "text/plain",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "pg06442728.xml",
|
"name": "pg06442728.xml",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/xml",
|
"mimetype": "application/xml",
|
||||||
|
File diff suppressed because one or more lines are too long
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "pntd.0008301",
|
"name": "pntd.0008301",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/xml",
|
"mimetype": "application/xml",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "pone.0234687",
|
"name": "pone.0234687",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/xml",
|
"mimetype": "application/xml",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "powerpoint_sample",
|
"name": "powerpoint_sample",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.ms-powerpoint",
|
"mimetype": "application/vnd.ms-powerpoint",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "powerpoint_with_image",
|
"name": "powerpoint_with_image",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.ms-powerpoint",
|
"mimetype": "application/vnd.ms-powerpoint",
|
||||||
|
File diff suppressed because one or more lines are too long
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "tablecell",
|
"name": "tablecell",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "test-01",
|
"name": "test-01",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
"mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "test_emf_docx",
|
"name": "test_emf_docx",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "unit_test_01",
|
"name": "unit_test_01",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "text/html",
|
"mimetype": "text/html",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "unit_test_headers",
|
"name": "unit_test_headers",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "unit_test_headers_numbered",
|
"name": "unit_test_headers_numbered",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "unit_test_lists",
|
"name": "unit_test_lists",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "wiki_duck",
|
"name": "wiki_duck",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "text/html",
|
"mimetype": "text/html",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "word_sample",
|
"name": "word_sample",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"name": "word_tables",
|
"name": "word_tables",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
@ -1 +1 @@
|
|||||||
{"schema_name": "DoclingDocument", "version": "1.0.0", "name": "ocr_test", "origin": {"mimetype": "application/pdf", "binary_hash": 14853448746796404529, "filename": "ocr_test.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}], "content_layer": "body", "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 69.6796646118164, "t": 764.9216918945312, "r": 504.87200927734375, "b": 689.012451171875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 94]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "pictures": [], "tables": [], "key_value_items": [], "pages": {"1": {"size": {"width": 595.201171875, "height": 841.9216918945312}, "image": null, "page_no": 1}}}
|
{"schema_name": "DoclingDocument", "version": "1.1.0", "name": "ocr_test", "origin": {"mimetype": "application/pdf", "binary_hash": 14853448746796404529, "filename": "ocr_test.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}], "content_layer": "body", "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 69.6796646118164, "t": 764.9216918945312, "r": 504.87200927734375, "b": 689.012451171875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 94]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "pictures": [], "tables": [], "key_value_items": [], "pages": {"1": {"size": {"width": 595.201171875, "height": 841.9216918945312}, "image": null, "page_no": 1}}}
|
Loading…
Reference in New Issue
Block a user