docling/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.json
Christoph Auer 31c86613e5 Fix streams
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2025-03-11 16:27:10 +01:00

1 line
4.5 KiB
JSON

{"schema_name": "DoclingDocument", "version": "1.2.0", "name": "2305.03393v1-pg9", "origin": {"mimetype": "application/pdf", "binary_hash": 3463920545297462180, "filename": "2305.03393v1-pg9.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}, {"cref": "#/texts/1"}, {"cref": "#/texts/2"}, {"cref": "#/texts/3"}, {"cref": "#/texts/4"}, {"cref": "#/tables/0"}, {"cref": "#/texts/6"}, {"cref": "#/texts/7"}, {"cref": "#/texts/8"}], "content_layer": "body", "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "content_layer": "furniture", "label": "page_header", "prov": [{"page_no": 1, "bbox": {"l": 194.478, "t": 698.337, "r": 447.545, "b": 689.963, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 60]}], "orig": "Optimized Table Tokenization for Table Structure Recognition", "text": "Optimized Table Tokenization for Table Structure Recognition"}, {"self_ref": "#/texts/1", "parent": {"cref": "#/body"}, "children": [], "content_layer": "furniture", "label": "page_header", "prov": [{"page_no": 1, "bbox": {"l": 475.984, "t": 698.337, "r": 480.593, "b": 689.963, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "9", "text": "9"}, {"self_ref": "#/texts/2", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 134.765, "t": 673.126, "r": 480.591, "b": 663.83, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 76]}], "orig": "order to compute the TED score. Inference timing results for all experiments", "text": "order to compute the TED score. Inference timing results for all experiments"}, {"self_ref": "#/texts/3", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "section_header", "prov": [{"page_no": 1, "bbox": {"l": 134.765, "t": 622.844, "r": 149.403, "b": 613.617, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 3]}], "orig": "5.1", "text": "5.1", "level": 1}, {"self_ref": "#/texts/4", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 134.765, "t": 606.474, "r": 480.582, "b": 597.178, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 73]}], "orig": "We have chosen the PubTabNet data set to perform HPO, since it includes a", "text": "We have chosen the PubTabNet data set to perform HPO, since it includes a"}, {"self_ref": "#/texts/5", "parent": {"cref": "#/tables/0"}, "children": [], "content_layer": "body", "label": "caption", "prov": [{"page_no": 1, "bbox": {"l": 134.765, "t": 518.264, "r": 160.118, "b": 508.127, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 5]}], "orig": "Table", "text": "Table"}, {"self_ref": "#/texts/6", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "section_header", "prov": [{"page_no": 1, "bbox": {"l": 134.765, "t": 283.878, "r": 149.403, "b": 274.65100000000007, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 3]}], "orig": "5.2", "text": "5.2", "level": 1}, {"self_ref": "#/texts/7", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 134.765, "t": 267.509, "r": 480.562, "b": 258.21299999999997, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 77]}], "orig": "We picked the model parameter configuration that produced the best prediction", "text": "We picked the model parameter configuration that produced the best prediction"}, {"self_ref": "#/texts/8", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 149.709, "t": 171.86699999999996, "r": 205.6, "b": 162.57100000000003, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 13]}], "orig": "Additionally,", "text": "Additionally,"}], "pictures": [], "tables": [{"self_ref": "#/tables/0", "parent": {"cref": "#/body"}, "children": [{"cref": "#/texts/5"}], "content_layer": "body", "label": "table", "prov": [{"page_no": 1, "bbox": {"l": 139.66741943359375, "t": 454.45458984375, "r": 475.00927734375, "b": 322.5054626464844, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 0]}], "captions": [{"cref": "#/texts/5"}], "references": [], "footnotes": [], "image": null, "data": {"table_cells": [], "num_rows": 0, "num_cols": 0, "grid": []}}], "key_value_items": [], "form_items": [], "pages": {"1": {"size": {"width": 612.0, "height": 792.0}, "image": null, "page_no": 1}}}