mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
* fix click dependency and update lock file Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Update test GT Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
286 lines
8.7 KiB
JSON
286 lines
8.7 KiB
JSON
{
|
|
"_name": "",
|
|
"type": "pdf-document",
|
|
"description": {
|
|
"title": null,
|
|
"abstract": null,
|
|
"authors": null,
|
|
"affiliations": null,
|
|
"subjects": null,
|
|
"keywords": null,
|
|
"publication_date": null,
|
|
"languages": null,
|
|
"license": null,
|
|
"publishers": null,
|
|
"url_refs": null,
|
|
"references": null,
|
|
"publication": null,
|
|
"reference_count": null,
|
|
"citation_count": null,
|
|
"citation_date": null,
|
|
"advanced": null,
|
|
"analytics": null,
|
|
"logs": [],
|
|
"collection": null,
|
|
"acquisition": null
|
|
},
|
|
"file-info": {
|
|
"filename": "picture_classification.pdf",
|
|
"filename-prov": null,
|
|
"document-hash": "959854dff729acaa22404d629a45cefcad8d942e595961185fc03a80d9fcc3a1",
|
|
"#-pages": 2,
|
|
"collection-name": null,
|
|
"description": null,
|
|
"page-hashes": [
|
|
{
|
|
"hash": "d9e3fc1226356b30c66012f05ad14089b00c59ea129195cd6ff8a0c68bda6f39",
|
|
"model": "default",
|
|
"page": 1
|
|
},
|
|
{
|
|
"hash": "9386884e13a97ce9662210a7e4258bbbb4f2e0e00663636160918e55b2806575",
|
|
"model": "default",
|
|
"page": 2
|
|
}
|
|
]
|
|
},
|
|
"main-text": [
|
|
{
|
|
"prov": [
|
|
{
|
|
"bbox": [
|
|
133.76801,
|
|
654.45184,
|
|
252.35513,
|
|
667.19122
|
|
],
|
|
"page": 1,
|
|
"span": [
|
|
0,
|
|
15
|
|
],
|
|
"__ref_s3_data": null
|
|
}
|
|
],
|
|
"text": "Figures Example",
|
|
"type": "subtitle-level-1",
|
|
"payload": null,
|
|
"name": "Section-header",
|
|
"font": null
|
|
},
|
|
{
|
|
"prov": [
|
|
{
|
|
"bbox": [
|
|
133.76801,
|
|
501.97412,
|
|
477.48276,
|
|
642.32806
|
|
],
|
|
"page": 1,
|
|
"span": [
|
|
0,
|
|
887
|
|
],
|
|
"__ref_s3_data": null
|
|
}
|
|
],
|
|
"text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.",
|
|
"type": "paragraph",
|
|
"payload": null,
|
|
"name": "Text",
|
|
"font": null
|
|
},
|
|
{
|
|
"name": "Picture",
|
|
"type": "figure",
|
|
"$ref": "#/figures/0"
|
|
},
|
|
{
|
|
"prov": [
|
|
{
|
|
"bbox": [
|
|
226.89101,
|
|
254.01826000000005,
|
|
384.3548,
|
|
262.86505
|
|
],
|
|
"page": 1,
|
|
"span": [
|
|
0,
|
|
35
|
|
],
|
|
"__ref_s3_data": null
|
|
}
|
|
],
|
|
"text": "Figure 1: This is an example image.",
|
|
"type": "caption",
|
|
"payload": null,
|
|
"name": "Caption",
|
|
"font": null
|
|
},
|
|
{
|
|
"prov": [
|
|
{
|
|
"bbox": [
|
|
133.76801,
|
|
122.51225,
|
|
477.48172000000005,
|
|
238.95505000000003
|
|
],
|
|
"page": 1,
|
|
"span": [
|
|
0,
|
|
747
|
|
],
|
|
"__ref_s3_data": null
|
|
}
|
|
],
|
|
"text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua.",
|
|
"type": "paragraph",
|
|
"payload": null,
|
|
"name": "Text",
|
|
"font": null
|
|
},
|
|
{
|
|
"prov": [
|
|
{
|
|
"bbox": [
|
|
133.76801,
|
|
523.7951,
|
|
477.48172000000005,
|
|
664.1490499999999
|
|
],
|
|
"page": 2,
|
|
"span": [
|
|
0,
|
|
887
|
|
],
|
|
"__ref_s3_data": null
|
|
}
|
|
],
|
|
"text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.",
|
|
"type": "paragraph",
|
|
"payload": null,
|
|
"name": "Text",
|
|
"font": null
|
|
},
|
|
{
|
|
"name": "Picture",
|
|
"type": "figure",
|
|
"$ref": "#/figures/1"
|
|
},
|
|
{
|
|
"prov": [
|
|
{
|
|
"bbox": [
|
|
226.89101,
|
|
259.94226000000003,
|
|
384.3548,
|
|
268.78903
|
|
],
|
|
"page": 2,
|
|
"span": [
|
|
0,
|
|
35
|
|
],
|
|
"__ref_s3_data": null
|
|
}
|
|
],
|
|
"text": "Figure 2: This is an example image.",
|
|
"type": "caption",
|
|
"payload": null,
|
|
"name": "Caption",
|
|
"font": null
|
|
},
|
|
{
|
|
"prov": [
|
|
{
|
|
"bbox": [
|
|
133.76801,
|
|
117.32024000000001,
|
|
477.48172000000005,
|
|
245.71804999999995
|
|
],
|
|
"page": 2,
|
|
"span": [
|
|
0,
|
|
804
|
|
],
|
|
"__ref_s3_data": null
|
|
}
|
|
],
|
|
"text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum.",
|
|
"type": "paragraph",
|
|
"payload": null,
|
|
"name": "Text",
|
|
"font": null
|
|
}
|
|
],
|
|
"figures": [
|
|
{
|
|
"prov": [
|
|
{
|
|
"bbox": [
|
|
134.9200439453125,
|
|
281.78173828125,
|
|
475.6635437011719,
|
|
487.109375
|
|
],
|
|
"page": 1,
|
|
"span": [
|
|
0,
|
|
35
|
|
],
|
|
"__ref_s3_data": null
|
|
}
|
|
],
|
|
"text": "Figure 1: This is an example image.",
|
|
"type": "figure",
|
|
"payload": null,
|
|
"bounding-box": null
|
|
},
|
|
{
|
|
"prov": [
|
|
{
|
|
"bbox": [
|
|
218.8155517578125,
|
|
283.10589599609375,
|
|
391.96246337890625,
|
|
513.9846496582031
|
|
],
|
|
"page": 2,
|
|
"span": [
|
|
0,
|
|
35
|
|
],
|
|
"__ref_s3_data": null
|
|
}
|
|
],
|
|
"text": "Figure 2: This is an example image.",
|
|
"type": "figure",
|
|
"payload": null,
|
|
"bounding-box": null
|
|
}
|
|
],
|
|
"tables": [],
|
|
"bitmaps": null,
|
|
"equations": [],
|
|
"footnotes": [],
|
|
"page-dimensions": [
|
|
{
|
|
"height": 792.0,
|
|
"page": 1,
|
|
"width": 612.0
|
|
},
|
|
{
|
|
"height": 792.0,
|
|
"page": 2,
|
|
"width": 612.0
|
|
}
|
|
],
|
|
"page-footers": [],
|
|
"page-headers": [],
|
|
"_s3_data": null,
|
|
"identifiers": null
|
|
} |