chore: format JSON test files to enable comparison (#1511)

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>
This commit is contained in:
Panos Vagenas
2025-05-02 11:52:18 +03:00
committed by GitHub
parent b147331f2a
commit de56523974
49 changed files with 2373840 additions and 52 deletions

View File

@@ -1 +1,83 @@
{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "ocr_test.pdf", "filename-prov": null, "document-hash": "80f38f5b87a84870681556176a9622186fd200dd32c5557be9e0c0af05b8bc61", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [{"hash": "14d896dc8bcb7ee7c08c0347eb6be8dcb92a3782501992f1ea14d2e58077d4e3", "model": "default", "page": 1}]}, "main-text": [{"prov": [{"bbox": [69.0, 688.5883585611979, 506.6666666666667, 767.2550252278646], "page": 1, "span": [0, 94], "__ref_s3_data": null}], "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "type": "paragraph", "payload": null, "name": "Text", "font": null}], "figures": [], "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 841.9216918945312, "page": 1, "width": 595.201171875}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null}
{
"_name": "",
"type": "pdf-document",
"description": {
"title": null,
"abstract": null,
"authors": null,
"affiliations": null,
"subjects": null,
"keywords": null,
"publication_date": null,
"languages": null,
"license": null,
"publishers": null,
"url_refs": null,
"references": null,
"publication": null,
"reference_count": null,
"citation_count": null,
"citation_date": null,
"advanced": null,
"analytics": null,
"logs": [],
"collection": null,
"acquisition": null
},
"file-info": {
"filename": "ocr_test.pdf",
"filename-prov": null,
"document-hash": "80f38f5b87a84870681556176a9622186fd200dd32c5557be9e0c0af05b8bc61",
"#-pages": 1,
"collection-name": null,
"description": null,
"page-hashes": [
{
"hash": "14d896dc8bcb7ee7c08c0347eb6be8dcb92a3782501992f1ea14d2e58077d4e3",
"model": "default",
"page": 1
}
]
},
"main-text": [
{
"prov": [
{
"bbox": [
69.0,
688.5883585611979,
506.6666666666667,
767.2550252278646
],
"page": 1,
"span": [
0,
94
],
"__ref_s3_data": null
}
],
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package",
"type": "paragraph",
"payload": null,
"name": "Text",
"font": null
}
],
"figures": [],
"tables": [],
"bitmaps": null,
"equations": [],
"footnotes": [],
"page-dimensions": [
{
"height": 841.9216918945312,
"page": 1,
"width": 595.201171875
}
],
"page-footers": [],
"page-headers": [],
"_s3_data": null,
"identifiers": null
}

File diff suppressed because one or more lines are too long