mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
chore: format JSON test files to enable comparison (#1511)
Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>
This commit is contained in:
@@ -1 +1,83 @@
|
||||
{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "ocr_test.pdf", "filename-prov": null, "document-hash": "80f38f5b87a84870681556176a9622186fd200dd32c5557be9e0c0af05b8bc61", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [{"hash": "14d896dc8bcb7ee7c08c0347eb6be8dcb92a3782501992f1ea14d2e58077d4e3", "model": "default", "page": 1}]}, "main-text": [{"prov": [{"bbox": [69.0, 688.5883585611979, 506.6666666666667, 767.2550252278646], "page": 1, "span": [0, 94], "__ref_s3_data": null}], "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "type": "paragraph", "payload": null, "name": "Text", "font": null}], "figures": [], "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 841.9216918945312, "page": 1, "width": 595.201171875}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null}
|
||||
{
|
||||
"_name": "",
|
||||
"type": "pdf-document",
|
||||
"description": {
|
||||
"title": null,
|
||||
"abstract": null,
|
||||
"authors": null,
|
||||
"affiliations": null,
|
||||
"subjects": null,
|
||||
"keywords": null,
|
||||
"publication_date": null,
|
||||
"languages": null,
|
||||
"license": null,
|
||||
"publishers": null,
|
||||
"url_refs": null,
|
||||
"references": null,
|
||||
"publication": null,
|
||||
"reference_count": null,
|
||||
"citation_count": null,
|
||||
"citation_date": null,
|
||||
"advanced": null,
|
||||
"analytics": null,
|
||||
"logs": [],
|
||||
"collection": null,
|
||||
"acquisition": null
|
||||
},
|
||||
"file-info": {
|
||||
"filename": "ocr_test.pdf",
|
||||
"filename-prov": null,
|
||||
"document-hash": "80f38f5b87a84870681556176a9622186fd200dd32c5557be9e0c0af05b8bc61",
|
||||
"#-pages": 1,
|
||||
"collection-name": null,
|
||||
"description": null,
|
||||
"page-hashes": [
|
||||
{
|
||||
"hash": "14d896dc8bcb7ee7c08c0347eb6be8dcb92a3782501992f1ea14d2e58077d4e3",
|
||||
"model": "default",
|
||||
"page": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"main-text": [
|
||||
{
|
||||
"prov": [
|
||||
{
|
||||
"bbox": [
|
||||
69.0,
|
||||
688.5883585611979,
|
||||
506.6666666666667,
|
||||
767.2550252278646
|
||||
],
|
||||
"page": 1,
|
||||
"span": [
|
||||
0,
|
||||
94
|
||||
],
|
||||
"__ref_s3_data": null
|
||||
}
|
||||
],
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package",
|
||||
"type": "paragraph",
|
||||
"payload": null,
|
||||
"name": "Text",
|
||||
"font": null
|
||||
}
|
||||
],
|
||||
"figures": [],
|
||||
"tables": [],
|
||||
"bitmaps": null,
|
||||
"equations": [],
|
||||
"footnotes": [],
|
||||
"page-dimensions": [
|
||||
{
|
||||
"height": 841.9216918945312,
|
||||
"page": 1,
|
||||
"width": 595.201171875
|
||||
}
|
||||
],
|
||||
"page-footers": [],
|
||||
"page-headers": [],
|
||||
"_s3_data": null,
|
||||
"identifiers": null
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -1 +1,77 @@
|
||||
{"schema_name": "DoclingDocument", "version": "1.3.0", "name": "ocr_test", "origin": {"mimetype": "application/pdf", "binary_hash": 14853448746796404529, "filename": "ocr_test.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}], "content_layer": "body", "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 69.0, "t": 767.2550252278646, "r": 506.6666666666667, "b": 688.5883585611979, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 94]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "formatting": null, "hyperlink": null}], "pictures": [], "tables": [], "key_value_items": [], "form_items": [], "pages": {"1": {"size": {"width": 595.201171875, "height": 841.9216918945312}, "image": null, "page_no": 1}}}
|
||||
{
|
||||
"schema_name": "DoclingDocument",
|
||||
"version": "1.3.0",
|
||||
"name": "ocr_test",
|
||||
"origin": {
|
||||
"mimetype": "application/pdf",
|
||||
"binary_hash": 14853448746796404529,
|
||||
"filename": "ocr_test.pdf",
|
||||
"uri": null
|
||||
},
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"parent": null,
|
||||
"children": [],
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
"body": {
|
||||
"self_ref": "#/body",
|
||||
"parent": null,
|
||||
"children": [
|
||||
{
|
||||
"cref": "#/texts/0"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
"groups": [],
|
||||
"texts": [
|
||||
{
|
||||
"self_ref": "#/texts/0",
|
||||
"parent": {
|
||||
"cref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "text",
|
||||
"prov": [
|
||||
{
|
||||
"page_no": 1,
|
||||
"bbox": {
|
||||
"l": 69.0,
|
||||
"t": 767.2550252278646,
|
||||
"r": 506.6666666666667,
|
||||
"b": 688.5883585611979,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"charspan": [
|
||||
0,
|
||||
94
|
||||
]
|
||||
}
|
||||
],
|
||||
"orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package",
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package",
|
||||
"formatting": null,
|
||||
"hyperlink": null
|
||||
}
|
||||
],
|
||||
"pictures": [],
|
||||
"tables": [],
|
||||
"key_value_items": [],
|
||||
"form_items": [],
|
||||
"pages": {
|
||||
"1": {
|
||||
"size": {
|
||||
"width": 595.201171875,
|
||||
"height": 841.9216918945312
|
||||
},
|
||||
"image": null,
|
||||
"page_no": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user