docling/tests/data_scanned/groundtruth/docling_v2/ocr_test.json
Clément Doumouro 7b4a4457e8 fix(layout,table): update e2e test
Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>
2025-07-09 17:06:30 +02:00

429 lines
12 KiB
JSON
Vendored

{
"schema_name": "DoclingDocument",
"version": "1.5.0",
"name": "ocr_test",
"origin": {
"mimetype": "application/pdf",
"binary_hash": 14846044078209721391,
"filename": "ocr_test.pdf"
},
"furniture": {
"self_ref": "#/furniture",
"children": [],
"content_layer": "furniture",
"name": "_root_",
"label": "unspecified"
},
"body": {
"self_ref": "#/body",
"children": [
{
"$ref": "#/tables/0"
}
],
"content_layer": "body",
"name": "_root_",
"label": "unspecified"
},
"groups": [],
"texts": [],
"pictures": [],
"tables": [
{
"self_ref": "#/tables/0",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "table",
"prov": [
{
"page_no": 1,
"bbox": {
"l": 69.05,
"t": 524.35,
"r": 551.1,
"b": 277.42,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
0,
0
]
}
],
"captions": [],
"references": [],
"footnotes": [],
"data": {
"table_cells": [
{
"bbox": {
"l": 97.33,
"t": 105.67,
"r": 190.0,
"b": 126.33,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 0,
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Vertically merged",
"column_header": true,
"row_header": false,
"row_section": false
},
{
"bbox": {
"l": 121.67,
"t": 204.33,
"r": 168.67,
"b": 220.0,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 1,
"end_row_offset_idx": 2,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "value",
"column_header": false,
"row_header": false,
"row_section": false
},
{
"bbox": {
"l": 121.67,
"t": 284.0,
"r": 168.67,
"b": 300.0,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 2,
"end_row_offset_idx": 3,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "value",
"column_header": false,
"row_header": false,
"row_section": false
},
{
"bbox": {
"l": 232.67,
"t": 105.67,
"r": 364.0,
"b": 126.33,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 0,
"end_row_offset_idx": 1,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Other merged column",
"column_header": true,
"row_header": false,
"row_section": false
},
{
"bbox": {
"l": 247.0,
"t": 188.33,
"r": 349.67,
"b": 204.33,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 1,
"end_row_offset_idx": 2,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Some other value",
"column_header": false,
"row_header": false,
"row_section": false
},
{
"bbox": {
"l": 247.0,
"t": 268.0,
"r": 349.67,
"b": 284.0,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 2,
"end_row_offset_idx": 3,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Some other value",
"column_header": false,
"row_header": false,
"row_section": false
},
{
"bbox": {
"l": 406.33,
"t": 105.67,
"r": 518.33,
"b": 121.67,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 0,
"end_row_offset_idx": 1,
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Yet another column",
"column_header": true,
"row_header": false,
"row_section": false
},
{
"bbox": {
"l": 408.33,
"t": 188.33,
"r": 514.0,
"b": 204.33,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 1,
"end_row_offset_idx": 2,
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Yet another value",
"column_header": false,
"row_header": false,
"row_section": false
},
{
"bbox": {
"l": 408.33,
"t": 268.0,
"r": 514.0,
"b": 284.0,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 2,
"end_row_offset_idx": 3,
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Yet another value",
"column_header": false,
"row_header": false,
"row_section": false
}
],
"num_rows": 3,
"num_cols": 3,
"grid": [
[
{
"bbox": {
"l": 97.33,
"t": 105.67,
"r": 190.0,
"b": 126.33,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 0,
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Vertically merged",
"column_header": true,
"row_header": false,
"row_section": false
},
{
"bbox": {
"l": 232.67,
"t": 105.67,
"r": 364.0,
"b": 126.33,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 0,
"end_row_offset_idx": 1,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Other merged column",
"column_header": true,
"row_header": false,
"row_section": false
},
{
"bbox": {
"l": 406.33,
"t": 105.67,
"r": 518.33,
"b": 121.67,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 0,
"end_row_offset_idx": 1,
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Yet another column",
"column_header": true,
"row_header": false,
"row_section": false
}
],
[
{
"bbox": {
"l": 121.67,
"t": 204.33,
"r": 168.67,
"b": 220.0,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 1,
"end_row_offset_idx": 2,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "value",
"column_header": false,
"row_header": false,
"row_section": false
},
{
"bbox": {
"l": 247.0,
"t": 188.33,
"r": 349.67,
"b": 204.33,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 1,
"end_row_offset_idx": 2,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Some other value",
"column_header": false,
"row_header": false,
"row_section": false
},
{
"bbox": {
"l": 408.33,
"t": 188.33,
"r": 514.0,
"b": 204.33,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 1,
"end_row_offset_idx": 2,
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Yet another value",
"column_header": false,
"row_header": false,
"row_section": false
}
],
[
{
"bbox": {
"l": 121.67,
"t": 284.0,
"r": 168.67,
"b": 300.0,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 2,
"end_row_offset_idx": 3,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "value",
"column_header": false,
"row_header": false,
"row_section": false
},
{
"bbox": {
"l": 247.0,
"t": 268.0,
"r": 349.67,
"b": 284.0,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 2,
"end_row_offset_idx": 3,
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Some other value",
"column_header": false,
"row_header": false,
"row_section": false
},
{
"bbox": {
"l": 408.33,
"t": 268.0,
"r": 514.0,
"b": 284.0,
"coord_origin": "TOPLEFT"
},
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 2,
"end_row_offset_idx": 3,
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Yet another value",
"column_header": false,
"row_header": false,
"row_section": false
}
]
]
},
"annotations": []
}
],
"key_value_items": [],
"form_items": [],
"pages": {
"1": {
"size": {
"width": 792.0,
"height": 612.0
},
"page_no": 1
}
}
}