From bba05d1c37c1b228a1e01a2d1d70f66aace63cef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cle=CC=81ment=20Doumouro?= Date: Wed, 9 Jul 2025 13:17:42 +0200 Subject: [PATCH] fix(layout,table): orientation-aware layout and table detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Clément Doumouro --- .../docling_v1/2305.03393v1-pg9.json | 6 +- .../docling_v1/2305.03393v1-pg9.pages.json | 54 +- .../docling_v2/webp-test.doctags.txt | 2 +- .../docling_v1/ocr_test.doctags.txt | 16 +- .../groundtruth/docling_v1/ocr_test.json | 204 +- .../docling_v1/ocr_test.pages.json | 3500 ++++++++- .../docling_v1/ocr_test_rotated_180.json | 291 +- .../ocr_test_rotated_180.pages.json | 6622 ++++++++++++++++- .../docling_v1/ocr_test_rotated_270.json | 107 +- .../ocr_test_rotated_270.pages.json | 4826 +++++++++++- .../docling_v1/ocr_test_rotated_90.pages.json | 3544 ++++++++- .../docling_v2/ocr_test.doctags.txt | 2 +- .../groundtruth/docling_v2/ocr_test.json | 580 +- .../docling_v2/ocr_test.pages.json | 4598 +++++++++++- .../ocr_test_rotated_180.doctags.txt | 3 +- .../docling_v2/ocr_test_rotated_180.json | 608 +- .../ocr_test_rotated_180.pages.json | 4661 +++++++++++- .../ocr_test_rotated_270.doctags.txt | 3 +- .../docling_v2/ocr_test_rotated_270.json | 610 +- .../ocr_test_rotated_270.pages.json | 4692 +++++++++++- .../ocr_test_rotated_90.doctags.txt | 3 +- .../docling_v2/ocr_test_rotated_90.json | 610 +- .../docling_v2/ocr_test_rotated_90.pages.json | 4692 +++++++++++- tests/test_e2e_ocr_conversion.py | 34 +- 24 files changed, 38224 insertions(+), 2044 deletions(-) diff --git a/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.json b/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.json index dd51e390..58701d5d 100644 --- a/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.json +++ b/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.json @@ -213,10 +213,10 @@ "prov": [ { "bbox": [ - 139.66741943359375, + 139.66746520996094, 322.5054626464844, - 475.00927734375, - 454.45458984375 + 475.0093078613281, + 454.4546203613281 ], "page": 1, "span": [ diff --git a/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.pages.json b/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.pages.json index 3010fbb6..114cbf31 100644 --- a/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.pages.json +++ b/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.pages.json @@ -2705,7 +2705,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.9373534917831421, + "confidence": 0.9373531937599182, "cells": [ { "index": 0, @@ -2745,7 +2745,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.8858680725097656, + "confidence": 0.8858677744865417, "cells": [ { "index": 1, @@ -2785,7 +2785,7 @@ "b": 152.90697999999998, "coord_origin": "TOPLEFT" }, - "confidence": 0.9806433916091919, + "confidence": 0.9806435108184814, "cells": [ { "index": 2, @@ -3155,7 +3155,7 @@ "b": 327.98218, "coord_origin": "TOPLEFT" }, - "confidence": 0.9591909050941467, + "confidence": 0.9591910243034363, "cells": [ { "index": 15, @@ -3339,9 +3339,9 @@ "id": 0, "label": "table", "bbox": { - "l": 139.66741943359375, - "t": 337.54541015625, - "r": 475.00927734375, + "l": 139.66746520996094, + "t": 337.5453796386719, + "r": 475.0093078613281, "b": 469.4945373535156, "coord_origin": "TOPLEFT" }, @@ -7846,7 +7846,7 @@ "b": 518.17419, "coord_origin": "TOPLEFT" }, - "confidence": 0.9589294195175171, + "confidence": 0.9589295387268066, "cells": [ { "index": 91, @@ -8243,9 +8243,9 @@ "id": 0, "label": "table", "bbox": { - "l": 139.66741943359375, - "t": 337.54541015625, - "r": 475.00927734375, + "l": 139.66746520996094, + "t": 337.5453796386719, + "r": 475.0093078613281, "b": 469.4945373535156, "coord_origin": "TOPLEFT" }, @@ -13641,7 +13641,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.9373534917831421, + "confidence": 0.9373531937599182, "cells": [ { "index": 0, @@ -13687,7 +13687,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.8858680725097656, + "confidence": 0.8858677744865417, "cells": [ { "index": 1, @@ -13733,7 +13733,7 @@ "b": 152.90697999999998, "coord_origin": "TOPLEFT" }, - "confidence": 0.9806433916091919, + "confidence": 0.9806435108184814, "cells": [ { "index": 2, @@ -14121,7 +14121,7 @@ "b": 327.98218, "coord_origin": "TOPLEFT" }, - "confidence": 0.9591909050941467, + "confidence": 0.9591910243034363, "cells": [ { "index": 15, @@ -14311,9 +14311,9 @@ "id": 0, "label": "table", "bbox": { - "l": 139.66741943359375, - "t": 337.54541015625, - "r": 475.00927734375, + "l": 139.66746520996094, + "t": 337.5453796386719, + "r": 475.0093078613281, "b": 469.4945373535156, "coord_origin": "TOPLEFT" }, @@ -19701,7 +19701,7 @@ "b": 518.17419, "coord_origin": "TOPLEFT" }, - "confidence": 0.9589294195175171, + "confidence": 0.9589295387268066, "cells": [ { "index": 91, @@ -20116,7 +20116,7 @@ "b": 152.90697999999998, "coord_origin": "TOPLEFT" }, - "confidence": 0.9806433916091919, + "confidence": 0.9806435108184814, "cells": [ { "index": 2, @@ -20504,7 +20504,7 @@ "b": 327.98218, "coord_origin": "TOPLEFT" }, - "confidence": 0.9591909050941467, + "confidence": 0.9591910243034363, "cells": [ { "index": 15, @@ -20694,9 +20694,9 @@ "id": 0, "label": "table", "bbox": { - "l": 139.66741943359375, - "t": 337.54541015625, - "r": 475.00927734375, + "l": 139.66746520996094, + "t": 337.5453796386719, + "r": 475.0093078613281, "b": 469.4945373535156, "coord_origin": "TOPLEFT" }, @@ -26084,7 +26084,7 @@ "b": 518.17419, "coord_origin": "TOPLEFT" }, - "confidence": 0.9589294195175171, + "confidence": 0.9589295387268066, "cells": [ { "index": 91, @@ -26499,7 +26499,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.9373534917831421, + "confidence": 0.9373531937599182, "cells": [ { "index": 0, @@ -26545,7 +26545,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.8858680725097656, + "confidence": 0.8858677744865417, "cells": [ { "index": 1, diff --git a/tests/data/webp/groundtruth/docling_v2/webp-test.doctags.txt b/tests/data/webp/groundtruth/docling_v2/webp-test.doctags.txt index 5682a134..76fe886d 100644 --- a/tests/data/webp/groundtruth/docling_v2/webp-test.doctags.txt +++ b/tests/data/webp/groundtruth/docling_v2/webp-test.doctags.txt @@ -1,2 +1,2 @@ -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package +Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test.doctags.txt b/tests/data_scanned/groundtruth/docling_v1/ocr_test.doctags.txt index 927ba0f2..19f5c6aa 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test.doctags.txt +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test.doctags.txt @@ -1,9 +1,11 @@ - - -Column 0Column 1Column 2 -this is row 0some cellshave contentand -and row 1otherhave -and last row 2nothinginside -
+This is a table test +The test starts with some random text and then a table image: +Some column +Some other column +Some row +some cell +have content +Some other row +other don't
\ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test.json index 8dbfff1f..20934507 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test.json +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test.json @@ -27,13 +27,13 @@ "file-info": { "filename": "ocr_test.pdf", "filename-prov": null, - "document-hash": "80f38f5b87a84870681556176a9622186fd200dd32c5557be9e0c0af05b8bc61", + "document-hash": "4220c26a23a085eeca7ed3904ae0952e7e73458e65ce19e56170a9ce095b2313", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [ { - "hash": "14d896dc8bcb7ee7c08c0347eb6be8dcb92a3782501992f1ea14d2e58077d4e3", + "hash": "07ff68c95cc6ec01fb38d02dc5d5efc466f3cfbf2e1dcb6c16b4e722d7f9f657", "model": "default", "page": 1 } @@ -44,20 +44,204 @@ "prov": [ { "bbox": [ - 69.6796630536824, - 689.0124221922704, - 504.8720051760782, - 764.9216921155637 + 201.26343, + 690.10254, + 417.96021, + 719.14941 ], "page": 1, "span": [ 0, - 94 + 20 ], "__ref_s3_data": null } ], - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", + "text": "This is a table test", + "type": "subtitle-level-1", + "payload": null, + "name": "Section-header", + "font": null + }, + { + "prov": [ + { + "bbox": [ + 72.0, + 655.42273, + 376.27319, + 667.7117899999998 + ], + "page": 1, + "span": [ + 0, + 61 + ], + "__ref_s3_data": null + } + ], + "text": "The test starts with some random text and then a table image:", + "type": "paragraph", + "payload": null, + "name": "Text", + "font": null + }, + { + "prov": [ + { + "bbox": [ + 275.33333333333337, + 601.0, + 343.66666666666663, + 609.6666666666666 + ], + "page": 1, + "span": [ + 0, + 11 + ], + "__ref_s3_data": null + } + ], + "text": "Some column", + "type": "paragraph", + "payload": null, + "name": "Text", + "font": null + }, + { + "prov": [ + { + "bbox": [ + 381.3333333333333, + 601.0, + 479.3333333333333, + 609.6666666666666 + ], + "page": 1, + "span": [ + 0, + 17 + ], + "__ref_s3_data": null + } + ], + "text": "Some other column", + "type": "paragraph", + "payload": null, + "name": "Text", + "font": null + }, + { + "prov": [ + { + "bbox": [ + 175.0, + 554.6666666666667, + 225.66666666666669, + 563.3333333333333 + ], + "page": 1, + "span": [ + 0, + 8 + ], + "__ref_s3_data": null + } + ], + "text": "Some row", + "type": "paragraph", + "payload": null, + "name": "Text", + "font": null + }, + { + "prov": [ + { + "bbox": [ + 286.0, + 554.6666666666667, + 333.0, + 563.3333333333333 + ], + "page": 1, + "span": [ + 0, + 9 + ], + "__ref_s3_data": null + } + ], + "text": "some cell", + "type": "paragraph", + "payload": null, + "name": "Text", + "font": null + }, + { + "prov": [ + { + "bbox": [ + 398.3333333333333, + 554.6666666666667, + 463.0, + 563.3333333333333 + ], + "page": 1, + "span": [ + 0, + 12 + ], + "__ref_s3_data": null + } + ], + "text": "have content", + "type": "paragraph", + "payload": null, + "name": "Text", + "font": null + }, + { + "prov": [ + { + "bbox": [ + 160.33333333333334, + 508.33333333333337, + 240.33333333333331, + 517.0 + ], + "page": 1, + "span": [ + 0, + 14 + ], + "__ref_s3_data": null + } + ], + "text": "Some other row", + "type": "paragraph", + "payload": null, + "name": "Text", + "font": null + }, + { + "prov": [ + { + "bbox": [ + 283.0, + 508.33333333333337, + 336.33333333333337, + 517.0 + ], + "page": 1, + "span": [ + 0, + 11 + ], + "__ref_s3_data": null + } + ], + "text": "other don't", "type": "paragraph", "payload": null, "name": "Text", @@ -71,9 +255,9 @@ "footnotes": [], "page-dimensions": [ { - "height": 841.9216918945312, + "height": 792.0, "page": 1, - "width": 595.201171875 + "width": 612.0 } ], "page-footers": [], diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test.pages.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test.pages.json index b53b75aa..8bfcaa25 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test.pages.json +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test.pages.json @@ -2,8 +2,8 @@ { "page_no": 0, "size": { - "width": 595.201171875, - "height": 841.9216918945312 + "width": 612.0, + "height": 792.0 }, "parsed_page": { "dimension": { @@ -11,47 +11,47 @@ "rect": { "r_x0": 0.0, "r_y0": 0.0, - "r_x1": 595.201171875, + "r_x1": 612.0, "r_y1": 0.0, - "r_x2": 595.201171875, - "r_y2": 841.9216918945312, + "r_x2": 612.0, + "r_y2": 792.0, "r_x3": 0.0, - "r_y3": 841.9216918945312, + "r_y3": 792.0, "coord_origin": "BOTTOMLEFT" }, "boundary_type": "crop_box", "art_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "bleed_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "crop_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "media_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "trim_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" } @@ -69,21 +69,21 @@ "a": 255 }, "rect": { - "r_x0": 73.34702132031646, - "r_y0": 97.99999977896755, - "r_x1": 503.64955224479564, - "r_y1": 97.99999977896755, - "r_x2": 503.64955224479564, - "r_y2": 76.99999977896756, - "r_x3": 73.34702132031646, - "r_y3": 76.99999977896756, + "r_x0": 201.26343, + "r_y0": 101.89746000000002, + "r_x1": 417.96021, + "r_y1": 101.89746000000002, + "r_x2": 417.96021, + "r_y2": 72.85059000000001, + "r_x3": 201.26343, + "r_y3": 72.85059000000001, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "This is a table test ", + "orig": "This is a table test ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -94,21 +94,21 @@ "a": 255 }, "rect": { - "r_x0": 69.6796630536824, - "r_y0": 124.83139494707741, - "r_x1": 504.8720051760782, - "r_y1": 124.83139494707741, - "r_x2": 504.8720051760782, - "r_y2": 104.00000011573796, - "r_x3": 69.6796630536824, - "r_y3": 104.00000011573796, + "r_x0": 72.0, + "r_y0": 136.57727, + "r_x1": 376.27319, + "r_y1": 136.57727, + "r_x2": 376.27319, + "r_y2": 124.28821000000016, + "r_x3": 72.0, + "r_y3": 124.28821000000016, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "The test starts with some random text and then a table image: ", + "orig": "The test starts with some random text and then a table image: ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 2, @@ -119,20 +119,395 @@ "a": 255 }, "rect": { - "r_x0": 71.84193505100733, - "r_y0": 152.90926970226084, - "r_x1": 153.088934155825, - "r_y1": 152.90926970226084, - "r_x2": 153.088934155825, - "r_y2": 129.797125232046, - "r_x3": 71.84193505100733, - "r_y3": 129.797125232046, + "r_x0": 275.33333333333337, + "r_y0": 191.0, + "r_x1": 304.0, + "r_y1": 191.0, + "r_x2": 304.0, + "r_y2": 182.33333333333334, + "r_x3": 275.33333333333337, + "r_y3": 182.33333333333334, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Some", + "orig": "Some", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 0.9609484899999999, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 308.0, + "r_y0": 191.0, + "r_x1": 343.66666666666663, + "r_y1": 191.0, + "r_x2": 343.66666666666663, + "r_y2": 182.33333333333334, + "r_x3": 308.0, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95935837, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 381.3333333333333, + "r_y0": 191.0, + "r_x1": 410.3333333333333, + "r_y1": 191.0, + "r_x2": 410.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 381.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95280136, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 414.3333333333333, + "r_y0": 191.0, + "r_x1": 440.3333333333333, + "r_y1": 191.0, + "r_x2": 440.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 414.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9649115, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 443.3333333333333, + "r_y0": 191.0, + "r_x1": 479.3333333333333, + "r_y1": 191.0, + "r_x2": 479.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 443.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9639427899999999, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 175.0, + "r_y0": 237.33333333333331, + "r_x1": 204.0, + "r_y1": 237.33333333333331, + "r_x2": 204.0, + "r_y2": 228.66666666666669, + "r_x3": 175.0, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96050453, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 208.0, + "r_y0": 237.33333333333331, + "r_x1": 225.66666666666669, + "r_y1": 237.33333333333331, + "r_x2": 225.66666666666669, + "r_y2": 231.0, + "r_x3": 208.0, + "r_y3": 231.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9623416899999999, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 286.0, + "r_y0": 237.33333333333331, + "r_x1": 313.0, + "r_y1": 237.33333333333331, + "r_x2": 313.0, + "r_y2": 231.0, + "r_x3": 286.0, + "r_y3": 231.0, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.96279846, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 317.0, + "r_y0": 237.33333333333331, + "r_x1": 333.0, + "r_y1": 237.33333333333331, + "r_x2": 333.0, + "r_y2": 228.66666666666669, + "r_x3": 317.0, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.96231712, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 398.3333333333333, + "r_y0": 237.33333333333331, + "r_x1": 422.0, + "r_y1": 237.33333333333331, + "r_x2": 422.0, + "r_y2": 228.66666666666669, + "r_x3": 398.3333333333333, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.96670181, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 426.0, + "r_y0": 237.33333333333331, + "r_x1": 463.0, + "r_y1": 237.33333333333331, + "r_x2": 463.0, + "r_y2": 229.0, + "r_x3": 426.0, + "r_y3": 229.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.9589679700000001, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 160.33333333333334, + "r_y0": 283.66666666666663, + "r_x1": 189.33333333333331, + "r_y1": 283.66666666666663, + "r_x2": 189.33333333333331, + "r_y2": 275.0, + "r_x3": 160.33333333333334, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95674171, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 193.0, + "r_y0": 283.66666666666663, + "r_x1": 219.0, + "r_y1": 283.66666666666663, + "r_x2": 219.0, + "r_y2": 275.0, + "r_x3": 193.0, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9601168099999999, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 222.66666666666669, + "r_y0": 283.66666666666663, + "r_x1": 240.33333333333331, + "r_y1": 283.66666666666663, + "r_x2": 240.33333333333331, + "r_y2": 277.33333333333337, + "r_x3": 222.66666666666669, + "r_y3": 277.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.96364174, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.0, + "r_y0": 283.66666666666663, + "r_x1": 309.0, + "r_y1": 283.66666666666663, + "r_x2": 309.0, + "r_y2": 275.0, + "r_x3": 283.0, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9636872099999999, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.33333333333337, + "r_y0": 283.66666666666663, + "r_x1": 336.33333333333337, + "r_y1": 283.66666666666663, + "r_x2": 336.33333333333337, + "r_y2": 275.0, + "r_x3": 312.33333333333337, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.9636872099999999, "from_ocr": true } ], @@ -146,16 +521,16 @@ "layout": { "clusters": [ { - "id": 0, - "label": "text", + "id": 9, + "label": "section_header", "bbox": { - "l": 69.6796630536824, - "t": 76.99999977896756, - "r": 504.8720051760782, - "b": 152.90926970226084, + "l": 201.26343, + "t": 72.85059000000001, + "r": 417.96021, + "b": 101.89746000000002, "coord_origin": "TOPLEFT" }, - "confidence": 0.9715733528137207, + "confidence": 0.6777006387710571, "cells": [ { "index": 0, @@ -166,22 +541,37 @@ "a": 255 }, "rect": { - "r_x0": 73.34702132031646, - "r_y0": 97.99999977896755, - "r_x1": 503.64955224479564, - "r_y1": 97.99999977896755, - "r_x2": 503.64955224479564, - "r_y2": 76.99999977896756, - "r_x3": 73.34702132031646, - "r_y3": 76.99999977896756, + "r_x0": 201.26343, + "r_y0": 101.89746000000002, + "r_x1": 417.96021, + "r_y1": 101.89746000000002, + "r_x2": 417.96021, + "r_y2": 72.85059000000001, + "r_x3": 201.26343, + "r_y3": 72.85059000000001, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "This is a table test ", + "orig": "This is a table test ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - }, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 72.0, + "t": 124.28821000000016, + "r": 376.27319, + "b": 136.57727, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.8428522944450378, + "cells": [ { "index": 1, "rgba": { @@ -191,22 +581,37 @@ "a": 255 }, "rect": { - "r_x0": 69.6796630536824, - "r_y0": 124.83139494707741, - "r_x1": 504.8720051760782, - "r_y1": 124.83139494707741, - "r_x2": 504.8720051760782, - "r_y2": 104.00000011573796, - "r_x3": 69.6796630536824, - "r_y3": 104.00000011573796, + "r_x0": 72.0, + "r_y0": 136.57727, + "r_x1": 376.27319, + "r_y1": 136.57727, + "r_x2": 376.27319, + "r_y2": 124.28821000000016, + "r_x3": 72.0, + "r_y3": 124.28821000000016, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "The test starts with some random text and then a table image: ", + "orig": "The test starts with some random text and then a table image: ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - }, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 8, + "label": "form", + "bbox": { + "l": 160.33333333333334, + "t": 182.33333333333334, + "r": 479.3333333333333, + "b": 283.66666666666663, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7274590134620667, + "cells": [ { "index": 2, "rgba": { @@ -216,24 +621,905 @@ "a": 255 }, "rect": { - "r_x0": 71.84193505100733, - "r_y0": 152.90926970226084, - "r_x1": 153.088934155825, - "r_y1": 152.90926970226084, - "r_x2": 153.088934155825, - "r_y2": 129.797125232046, - "r_x3": 71.84193505100733, - "r_y3": 129.797125232046, + "r_x0": 275.33333333333337, + "r_y0": 191.0, + "r_x1": 304.0, + "r_y1": 191.0, + "r_x2": 304.0, + "r_y2": 182.33333333333334, + "r_x3": 275.33333333333337, + "r_y3": 182.33333333333334, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Some", + "orig": "Some", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 0.9609484899999999, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 308.0, + "r_y0": 191.0, + "r_x1": 343.66666666666663, + "r_y1": 191.0, + "r_x2": 343.66666666666663, + "r_y2": 182.33333333333334, + "r_x3": 308.0, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95935837, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 381.3333333333333, + "r_y0": 191.0, + "r_x1": 410.3333333333333, + "r_y1": 191.0, + "r_x2": 410.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 381.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95280136, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 414.3333333333333, + "r_y0": 191.0, + "r_x1": 440.3333333333333, + "r_y1": 191.0, + "r_x2": 440.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 414.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9649115, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 443.3333333333333, + "r_y0": 191.0, + "r_x1": 479.3333333333333, + "r_y1": 191.0, + "r_x2": 479.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 443.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9639427899999999, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 175.0, + "r_y0": 237.33333333333331, + "r_x1": 204.0, + "r_y1": 237.33333333333331, + "r_x2": 204.0, + "r_y2": 228.66666666666669, + "r_x3": 175.0, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96050453, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 208.0, + "r_y0": 237.33333333333331, + "r_x1": 225.66666666666669, + "r_y1": 237.33333333333331, + "r_x2": 225.66666666666669, + "r_y2": 231.0, + "r_x3": 208.0, + "r_y3": 231.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9623416899999999, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 286.0, + "r_y0": 237.33333333333331, + "r_x1": 313.0, + "r_y1": 237.33333333333331, + "r_x2": 313.0, + "r_y2": 231.0, + "r_x3": 286.0, + "r_y3": 231.0, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.96279846, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 317.0, + "r_y0": 237.33333333333331, + "r_x1": 333.0, + "r_y1": 237.33333333333331, + "r_x2": 333.0, + "r_y2": 228.66666666666669, + "r_x3": 317.0, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.96231712, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 398.3333333333333, + "r_y0": 237.33333333333331, + "r_x1": 422.0, + "r_y1": 237.33333333333331, + "r_x2": 422.0, + "r_y2": 228.66666666666669, + "r_x3": 398.3333333333333, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.96670181, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 426.0, + "r_y0": 237.33333333333331, + "r_x1": 463.0, + "r_y1": 237.33333333333331, + "r_x2": 463.0, + "r_y2": 229.0, + "r_x3": 426.0, + "r_y3": 229.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.9589679700000001, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 160.33333333333334, + "r_y0": 283.66666666666663, + "r_x1": 189.33333333333331, + "r_y1": 283.66666666666663, + "r_x2": 189.33333333333331, + "r_y2": 275.0, + "r_x3": 160.33333333333334, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95674171, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 193.0, + "r_y0": 283.66666666666663, + "r_x1": 219.0, + "r_y1": 283.66666666666663, + "r_x2": 219.0, + "r_y2": 275.0, + "r_x3": 193.0, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9601168099999999, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 222.66666666666669, + "r_y0": 283.66666666666663, + "r_x1": 240.33333333333331, + "r_y1": 283.66666666666663, + "r_x2": 240.33333333333331, + "r_y2": 277.33333333333337, + "r_x3": 222.66666666666669, + "r_y3": 277.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.96364174, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.0, + "r_y0": 283.66666666666663, + "r_x1": 309.0, + "r_y1": 283.66666666666663, + "r_x2": 309.0, + "r_y2": 275.0, + "r_x3": 283.0, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9636872099999999, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.33333333333337, + "r_y0": 283.66666666666663, + "r_x1": 336.33333333333337, + "r_y1": 283.66666666666663, + "r_x2": 336.33333333333337, + "r_y2": 275.0, + "r_x3": 312.33333333333337, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.9636872099999999, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 4, + "label": "text", + "bbox": { + "l": 275.33333333333337, + "t": 182.33333333333334, + "r": 343.66666666666663, + "b": 191.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9056976437568665, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 275.33333333333337, + "r_y0": 191.0, + "r_x1": 304.0, + "r_y1": 191.0, + "r_x2": 304.0, + "r_y2": 182.33333333333334, + "r_x3": 275.33333333333337, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9609484899999999, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 308.0, + "r_y0": 191.0, + "r_x1": 343.66666666666663, + "r_y1": 191.0, + "r_x2": 343.66666666666663, + "r_y2": 182.33333333333334, + "r_x3": 308.0, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95935837, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 381.3333333333333, + "t": 182.33333333333334, + "r": 479.3333333333333, + "b": 191.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9007152318954468, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 381.3333333333333, + "r_y0": 191.0, + "r_x1": 410.3333333333333, + "r_y1": 191.0, + "r_x2": 410.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 381.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95280136, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 414.3333333333333, + "r_y0": 191.0, + "r_x1": 440.3333333333333, + "r_y1": 191.0, + "r_x2": 440.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 414.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9649115, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 443.3333333333333, + "r_y0": 191.0, + "r_x1": 479.3333333333333, + "r_y1": 191.0, + "r_x2": 479.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 443.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9639427899999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 1, + "label": "text", + "bbox": { + "l": 175.0, + "t": 228.66666666666669, + "r": 225.66666666666669, + "b": 237.33333333333331, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9129480123519897, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 175.0, + "r_y0": 237.33333333333331, + "r_x1": 204.0, + "r_y1": 237.33333333333331, + "r_x2": 204.0, + "r_y2": 228.66666666666669, + "r_x3": 175.0, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96050453, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 208.0, + "r_y0": 237.33333333333331, + "r_x1": 225.66666666666669, + "r_y1": 237.33333333333331, + "r_x2": 225.66666666666669, + "r_y2": 231.0, + "r_x3": 208.0, + "r_y3": 231.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9623416899999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 286.0, + "t": 228.66666666666669, + "r": 333.0, + "b": 237.33333333333331, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9123309850692749, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 286.0, + "r_y0": 237.33333333333331, + "r_x1": 313.0, + "r_y1": 237.33333333333331, + "r_x2": 313.0, + "r_y2": 231.0, + "r_x3": 286.0, + "r_y3": 231.0, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.96279846, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 317.0, + "r_y0": 237.33333333333331, + "r_x1": 333.0, + "r_y1": 237.33333333333331, + "r_x2": 333.0, + "r_y2": 228.66666666666669, + "r_x3": 317.0, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.96231712, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 398.3333333333333, + "t": 228.66666666666669, + "r": 463.0, + "b": 237.33333333333331, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.8969476819038391, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 398.3333333333333, + "r_y0": 237.33333333333331, + "r_x1": 422.0, + "r_y1": 237.33333333333331, + "r_x2": 422.0, + "r_y2": 228.66666666666669, + "r_x3": 398.3333333333333, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.96670181, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 426.0, + "r_y0": 237.33333333333331, + "r_x1": 463.0, + "r_y1": 237.33333333333331, + "r_x2": 463.0, + "r_y2": 229.0, + "r_x3": 426.0, + "r_y3": 229.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.9589679700000001, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 0, + "label": "text", + "bbox": { + "l": 160.33333333333334, + "t": 275.0, + "r": 240.33333333333331, + "b": 283.66666666666663, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9129647612571716, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 160.33333333333334, + "r_y0": 283.66666666666663, + "r_x1": 189.33333333333331, + "r_y1": 283.66666666666663, + "r_x2": 189.33333333333331, + "r_y2": 275.0, + "r_x3": 160.33333333333334, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95674171, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 193.0, + "r_y0": 283.66666666666663, + "r_x1": 219.0, + "r_y1": 283.66666666666663, + "r_x2": 219.0, + "r_y2": 275.0, + "r_x3": 193.0, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9601168099999999, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 222.66666666666669, + "r_y0": 283.66666666666663, + "r_x1": 240.33333333333331, + "r_y1": 283.66666666666663, + "r_x2": 240.33333333333331, + "r_y2": 277.33333333333337, + "r_x3": 222.66666666666669, + "r_y3": 277.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.96364174, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 283.0, + "t": 275.0, + "r": 336.33333333333337, + "b": 283.66666666666663, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9102913737297058, + "cells": [ + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.0, + "r_y0": 283.66666666666663, + "r_x1": 309.0, + "r_y1": 283.66666666666663, + "r_x2": 309.0, + "r_y2": 275.0, + "r_x3": 283.0, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9636872099999999, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.33333333333337, + "r_y0": 283.66666666666663, + "r_x1": 336.33333333333337, + "r_y1": 283.66666666666663, + "r_x2": 336.33333333333337, + "r_y2": 275.0, + "r_x3": 312.33333333333337, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.9636872099999999, + "from_ocr": true + } + ], + "children": [] + } + ] } ] }, @@ -247,20 +1533,20 @@ "assembled": { "elements": [ { - "label": "text", - "id": 0, + "label": "section_header", + "id": 9, "page_no": 0, "cluster": { - "id": 0, - "label": "text", + "id": 9, + "label": "section_header", "bbox": { - "l": 69.6796630536824, - "t": 76.99999977896756, - "r": 504.8720051760782, - "b": 152.90926970226084, + "l": 201.26343, + "t": 72.85059000000001, + "r": 417.96021, + "b": 101.89746000000002, "coord_origin": "TOPLEFT" }, - "confidence": 0.9715733528137207, + "confidence": 0.6777006387710571, "cells": [ { "index": 0, @@ -271,22 +1557,43 @@ "a": 255 }, "rect": { - "r_x0": 73.34702132031646, - "r_y0": 97.99999977896755, - "r_x1": 503.64955224479564, - "r_y1": 97.99999977896755, - "r_x2": 503.64955224479564, - "r_y2": 76.99999977896756, - "r_x3": 73.34702132031646, - "r_y3": 76.99999977896756, + "r_x0": 201.26343, + "r_y0": 101.89746000000002, + "r_x1": 417.96021, + "r_y1": 101.89746000000002, + "r_x2": 417.96021, + "r_y2": 72.85059000000001, + "r_x3": 201.26343, + "r_y3": 72.85059000000001, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "This is a table test ", + "orig": "This is a table test ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - }, + "from_ocr": false + } + ], + "children": [] + }, + "text": "This is a table test" + }, + { + "label": "text", + "id": 7, + "page_no": 0, + "cluster": { + "id": 7, + "label": "text", + "bbox": { + "l": 72.0, + "t": 124.28821000000016, + "r": 376.27319, + "b": 136.57727, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.8428522944450378, + "cells": [ { "index": 1, "rgba": { @@ -296,22 +1603,43 @@ "a": 255 }, "rect": { - "r_x0": 69.6796630536824, - "r_y0": 124.83139494707741, - "r_x1": 504.8720051760782, - "r_y1": 124.83139494707741, - "r_x2": 504.8720051760782, - "r_y2": 104.00000011573796, - "r_x3": 69.6796630536824, - "r_y3": 104.00000011573796, + "r_x0": 72.0, + "r_y0": 136.57727, + "r_x1": 376.27319, + "r_y1": 136.57727, + "r_x2": 376.27319, + "r_y2": 124.28821000000016, + "r_x3": 72.0, + "r_y3": 124.28821000000016, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "The test starts with some random text and then a table image: ", + "orig": "The test starts with some random text and then a table image: ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - }, + "from_ocr": false + } + ], + "children": [] + }, + "text": "The test starts with some random text and then a table image:" + }, + { + "label": "form", + "id": 8, + "page_no": 0, + "cluster": { + "id": 8, + "label": "form", + "bbox": { + "l": 160.33333333333334, + "t": 182.33333333333334, + "r": 479.3333333333333, + "b": 283.66666666666663, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7274590134620667, + "cells": [ { "index": 2, "rgba": { @@ -321,44 +1649,925 @@ "a": 255 }, "rect": { - "r_x0": 71.84193505100733, - "r_y0": 152.90926970226084, - "r_x1": 153.088934155825, - "r_y1": 152.90926970226084, - "r_x2": 153.088934155825, - "r_y2": 129.797125232046, - "r_x3": 71.84193505100733, - "r_y3": 129.797125232046, + "r_x0": 275.33333333333337, + "r_y0": 191.0, + "r_x1": 304.0, + "r_y1": 191.0, + "r_x2": 304.0, + "r_y2": 182.33333333333334, + "r_x3": 275.33333333333337, + "r_y3": 182.33333333333334, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Some", + "orig": "Some", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 0.9609484899999999, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 308.0, + "r_y0": 191.0, + "r_x1": 343.66666666666663, + "r_y1": 191.0, + "r_x2": 343.66666666666663, + "r_y2": 182.33333333333334, + "r_x3": 308.0, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95935837, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 381.3333333333333, + "r_y0": 191.0, + "r_x1": 410.3333333333333, + "r_y1": 191.0, + "r_x2": 410.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 381.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95280136, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 414.3333333333333, + "r_y0": 191.0, + "r_x1": 440.3333333333333, + "r_y1": 191.0, + "r_x2": 440.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 414.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9649115, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 443.3333333333333, + "r_y0": 191.0, + "r_x1": 479.3333333333333, + "r_y1": 191.0, + "r_x2": 479.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 443.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9639427899999999, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 175.0, + "r_y0": 237.33333333333331, + "r_x1": 204.0, + "r_y1": 237.33333333333331, + "r_x2": 204.0, + "r_y2": 228.66666666666669, + "r_x3": 175.0, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96050453, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 208.0, + "r_y0": 237.33333333333331, + "r_x1": 225.66666666666669, + "r_y1": 237.33333333333331, + "r_x2": 225.66666666666669, + "r_y2": 231.0, + "r_x3": 208.0, + "r_y3": 231.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9623416899999999, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 286.0, + "r_y0": 237.33333333333331, + "r_x1": 313.0, + "r_y1": 237.33333333333331, + "r_x2": 313.0, + "r_y2": 231.0, + "r_x3": 286.0, + "r_y3": 231.0, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.96279846, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 317.0, + "r_y0": 237.33333333333331, + "r_x1": 333.0, + "r_y1": 237.33333333333331, + "r_x2": 333.0, + "r_y2": 228.66666666666669, + "r_x3": 317.0, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.96231712, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 398.3333333333333, + "r_y0": 237.33333333333331, + "r_x1": 422.0, + "r_y1": 237.33333333333331, + "r_x2": 422.0, + "r_y2": 228.66666666666669, + "r_x3": 398.3333333333333, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.96670181, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 426.0, + "r_y0": 237.33333333333331, + "r_x1": 463.0, + "r_y1": 237.33333333333331, + "r_x2": 463.0, + "r_y2": 229.0, + "r_x3": 426.0, + "r_y3": 229.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.9589679700000001, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 160.33333333333334, + "r_y0": 283.66666666666663, + "r_x1": 189.33333333333331, + "r_y1": 283.66666666666663, + "r_x2": 189.33333333333331, + "r_y2": 275.0, + "r_x3": 160.33333333333334, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95674171, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 193.0, + "r_y0": 283.66666666666663, + "r_x1": 219.0, + "r_y1": 283.66666666666663, + "r_x2": 219.0, + "r_y2": 275.0, + "r_x3": 193.0, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9601168099999999, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 222.66666666666669, + "r_y0": 283.66666666666663, + "r_x1": 240.33333333333331, + "r_y1": 283.66666666666663, + "r_x2": 240.33333333333331, + "r_y2": 277.33333333333337, + "r_x3": 222.66666666666669, + "r_y3": 277.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.96364174, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.0, + "r_y0": 283.66666666666663, + "r_x1": 309.0, + "r_y1": 283.66666666666663, + "r_x2": 309.0, + "r_y2": 275.0, + "r_x3": 283.0, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9636872099999999, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.33333333333337, + "r_y0": 283.66666666666663, + "r_x1": 336.33333333333337, + "r_y1": 283.66666666666663, + "r_x2": 336.33333333333337, + "r_y2": 275.0, + "r_x3": 312.33333333333337, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.9636872099999999, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 4, + "label": "text", + "bbox": { + "l": 275.33333333333337, + "t": 182.33333333333334, + "r": 343.66666666666663, + "b": 191.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9056976437568665, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 275.33333333333337, + "r_y0": 191.0, + "r_x1": 304.0, + "r_y1": 191.0, + "r_x2": 304.0, + "r_y2": 182.33333333333334, + "r_x3": 275.33333333333337, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9609484899999999, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 308.0, + "r_y0": 191.0, + "r_x1": 343.66666666666663, + "r_y1": 191.0, + "r_x2": 343.66666666666663, + "r_y2": 182.33333333333334, + "r_x3": 308.0, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95935837, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 381.3333333333333, + "t": 182.33333333333334, + "r": 479.3333333333333, + "b": 191.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9007152318954468, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 381.3333333333333, + "r_y0": 191.0, + "r_x1": 410.3333333333333, + "r_y1": 191.0, + "r_x2": 410.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 381.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95280136, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 414.3333333333333, + "r_y0": 191.0, + "r_x1": 440.3333333333333, + "r_y1": 191.0, + "r_x2": 440.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 414.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9649115, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 443.3333333333333, + "r_y0": 191.0, + "r_x1": 479.3333333333333, + "r_y1": 191.0, + "r_x2": 479.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 443.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9639427899999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 1, + "label": "text", + "bbox": { + "l": 175.0, + "t": 228.66666666666669, + "r": 225.66666666666669, + "b": 237.33333333333331, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9129480123519897, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 175.0, + "r_y0": 237.33333333333331, + "r_x1": 204.0, + "r_y1": 237.33333333333331, + "r_x2": 204.0, + "r_y2": 228.66666666666669, + "r_x3": 175.0, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96050453, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 208.0, + "r_y0": 237.33333333333331, + "r_x1": 225.66666666666669, + "r_y1": 237.33333333333331, + "r_x2": 225.66666666666669, + "r_y2": 231.0, + "r_x3": 208.0, + "r_y3": 231.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9623416899999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 286.0, + "t": 228.66666666666669, + "r": 333.0, + "b": 237.33333333333331, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9123309850692749, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 286.0, + "r_y0": 237.33333333333331, + "r_x1": 313.0, + "r_y1": 237.33333333333331, + "r_x2": 313.0, + "r_y2": 231.0, + "r_x3": 286.0, + "r_y3": 231.0, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.96279846, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 317.0, + "r_y0": 237.33333333333331, + "r_x1": 333.0, + "r_y1": 237.33333333333331, + "r_x2": 333.0, + "r_y2": 228.66666666666669, + "r_x3": 317.0, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.96231712, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 398.3333333333333, + "t": 228.66666666666669, + "r": 463.0, + "b": 237.33333333333331, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.8969476819038391, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 398.3333333333333, + "r_y0": 237.33333333333331, + "r_x1": 422.0, + "r_y1": 237.33333333333331, + "r_x2": 422.0, + "r_y2": 228.66666666666669, + "r_x3": 398.3333333333333, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.96670181, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 426.0, + "r_y0": 237.33333333333331, + "r_x1": 463.0, + "r_y1": 237.33333333333331, + "r_x2": 463.0, + "r_y2": 229.0, + "r_x3": 426.0, + "r_y3": 229.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.9589679700000001, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 0, + "label": "text", + "bbox": { + "l": 160.33333333333334, + "t": 275.0, + "r": 240.33333333333331, + "b": 283.66666666666663, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9129647612571716, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 160.33333333333334, + "r_y0": 283.66666666666663, + "r_x1": 189.33333333333331, + "r_y1": 283.66666666666663, + "r_x2": 189.33333333333331, + "r_y2": 275.0, + "r_x3": 160.33333333333334, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95674171, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 193.0, + "r_y0": 283.66666666666663, + "r_x1": 219.0, + "r_y1": 283.66666666666663, + "r_x2": 219.0, + "r_y2": 275.0, + "r_x3": 193.0, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9601168099999999, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 222.66666666666669, + "r_y0": 283.66666666666663, + "r_x1": 240.33333333333331, + "r_y1": 283.66666666666663, + "r_x2": 240.33333333333331, + "r_y2": 277.33333333333337, + "r_x3": 222.66666666666669, + "r_y3": 277.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.96364174, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 283.0, + "t": 275.0, + "r": 336.33333333333337, + "b": 283.66666666666663, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9102913737297058, + "cells": [ + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.0, + "r_y0": 283.66666666666663, + "r_x1": 309.0, + "r_y1": 283.66666666666663, + "r_x2": 309.0, + "r_y2": 275.0, + "r_x3": 283.0, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9636872099999999, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.33333333333337, + "r_y0": 283.66666666666663, + "r_x1": 336.33333333333337, + "r_y1": 283.66666666666663, + "r_x2": 336.33333333333337, + "r_y2": 275.0, + "r_x3": 312.33333333333337, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.9636872099999999, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package" + "text": null } ], "body": [ { - "label": "text", - "id": 0, + "label": "section_header", + "id": 9, "page_no": 0, "cluster": { - "id": 0, - "label": "text", + "id": 9, + "label": "section_header", "bbox": { - "l": 69.6796630536824, - "t": 76.99999977896756, - "r": 504.8720051760782, - "b": 152.90926970226084, + "l": 201.26343, + "t": 72.85059000000001, + "r": 417.96021, + "b": 101.89746000000002, "coord_origin": "TOPLEFT" }, - "confidence": 0.9715733528137207, + "confidence": 0.6777006387710571, "cells": [ { "index": 0, @@ -369,22 +2578,43 @@ "a": 255 }, "rect": { - "r_x0": 73.34702132031646, - "r_y0": 97.99999977896755, - "r_x1": 503.64955224479564, - "r_y1": 97.99999977896755, - "r_x2": 503.64955224479564, - "r_y2": 76.99999977896756, - "r_x3": 73.34702132031646, - "r_y3": 76.99999977896756, + "r_x0": 201.26343, + "r_y0": 101.89746000000002, + "r_x1": 417.96021, + "r_y1": 101.89746000000002, + "r_x2": 417.96021, + "r_y2": 72.85059000000001, + "r_x3": 201.26343, + "r_y3": 72.85059000000001, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "This is a table test ", + "orig": "This is a table test ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - }, + "from_ocr": false + } + ], + "children": [] + }, + "text": "This is a table test" + }, + { + "label": "text", + "id": 7, + "page_no": 0, + "cluster": { + "id": 7, + "label": "text", + "bbox": { + "l": 72.0, + "t": 124.28821000000016, + "r": 376.27319, + "b": 136.57727, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.8428522944450378, + "cells": [ { "index": 1, "rgba": { @@ -394,22 +2624,43 @@ "a": 255 }, "rect": { - "r_x0": 69.6796630536824, - "r_y0": 124.83139494707741, - "r_x1": 504.8720051760782, - "r_y1": 124.83139494707741, - "r_x2": 504.8720051760782, - "r_y2": 104.00000011573796, - "r_x3": 69.6796630536824, - "r_y3": 104.00000011573796, + "r_x0": 72.0, + "r_y0": 136.57727, + "r_x1": 376.27319, + "r_y1": 136.57727, + "r_x2": 376.27319, + "r_y2": 124.28821000000016, + "r_x3": 72.0, + "r_y3": 124.28821000000016, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "The test starts with some random text and then a table image: ", + "orig": "The test starts with some random text and then a table image: ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - }, + "from_ocr": false + } + ], + "children": [] + }, + "text": "The test starts with some random text and then a table image:" + }, + { + "label": "form", + "id": 8, + "page_no": 0, + "cluster": { + "id": 8, + "label": "form", + "bbox": { + "l": 160.33333333333334, + "t": 182.33333333333334, + "r": 479.3333333333333, + "b": 283.66666666666663, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7274590134620667, + "cells": [ { "index": 2, "rgba": { @@ -419,26 +2670,907 @@ "a": 255 }, "rect": { - "r_x0": 71.84193505100733, - "r_y0": 152.90926970226084, - "r_x1": 153.088934155825, - "r_y1": 152.90926970226084, - "r_x2": 153.088934155825, - "r_y2": 129.797125232046, - "r_x3": 71.84193505100733, - "r_y3": 129.797125232046, + "r_x0": 275.33333333333337, + "r_y0": 191.0, + "r_x1": 304.0, + "r_y1": 191.0, + "r_x2": 304.0, + "r_y2": 182.33333333333334, + "r_x3": 275.33333333333337, + "r_y3": 182.33333333333334, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Some", + "orig": "Some", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 0.9609484899999999, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 308.0, + "r_y0": 191.0, + "r_x1": 343.66666666666663, + "r_y1": 191.0, + "r_x2": 343.66666666666663, + "r_y2": 182.33333333333334, + "r_x3": 308.0, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95935837, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 381.3333333333333, + "r_y0": 191.0, + "r_x1": 410.3333333333333, + "r_y1": 191.0, + "r_x2": 410.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 381.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95280136, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 414.3333333333333, + "r_y0": 191.0, + "r_x1": 440.3333333333333, + "r_y1": 191.0, + "r_x2": 440.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 414.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9649115, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 443.3333333333333, + "r_y0": 191.0, + "r_x1": 479.3333333333333, + "r_y1": 191.0, + "r_x2": 479.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 443.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9639427899999999, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 175.0, + "r_y0": 237.33333333333331, + "r_x1": 204.0, + "r_y1": 237.33333333333331, + "r_x2": 204.0, + "r_y2": 228.66666666666669, + "r_x3": 175.0, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96050453, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 208.0, + "r_y0": 237.33333333333331, + "r_x1": 225.66666666666669, + "r_y1": 237.33333333333331, + "r_x2": 225.66666666666669, + "r_y2": 231.0, + "r_x3": 208.0, + "r_y3": 231.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9623416899999999, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 286.0, + "r_y0": 237.33333333333331, + "r_x1": 313.0, + "r_y1": 237.33333333333331, + "r_x2": 313.0, + "r_y2": 231.0, + "r_x3": 286.0, + "r_y3": 231.0, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.96279846, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 317.0, + "r_y0": 237.33333333333331, + "r_x1": 333.0, + "r_y1": 237.33333333333331, + "r_x2": 333.0, + "r_y2": 228.66666666666669, + "r_x3": 317.0, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.96231712, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 398.3333333333333, + "r_y0": 237.33333333333331, + "r_x1": 422.0, + "r_y1": 237.33333333333331, + "r_x2": 422.0, + "r_y2": 228.66666666666669, + "r_x3": 398.3333333333333, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.96670181, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 426.0, + "r_y0": 237.33333333333331, + "r_x1": 463.0, + "r_y1": 237.33333333333331, + "r_x2": 463.0, + "r_y2": 229.0, + "r_x3": 426.0, + "r_y3": 229.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.9589679700000001, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 160.33333333333334, + "r_y0": 283.66666666666663, + "r_x1": 189.33333333333331, + "r_y1": 283.66666666666663, + "r_x2": 189.33333333333331, + "r_y2": 275.0, + "r_x3": 160.33333333333334, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95674171, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 193.0, + "r_y0": 283.66666666666663, + "r_x1": 219.0, + "r_y1": 283.66666666666663, + "r_x2": 219.0, + "r_y2": 275.0, + "r_x3": 193.0, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9601168099999999, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 222.66666666666669, + "r_y0": 283.66666666666663, + "r_x1": 240.33333333333331, + "r_y1": 283.66666666666663, + "r_x2": 240.33333333333331, + "r_y2": 277.33333333333337, + "r_x3": 222.66666666666669, + "r_y3": 277.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.96364174, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.0, + "r_y0": 283.66666666666663, + "r_x1": 309.0, + "r_y1": 283.66666666666663, + "r_x2": 309.0, + "r_y2": 275.0, + "r_x3": 283.0, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9636872099999999, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.33333333333337, + "r_y0": 283.66666666666663, + "r_x1": 336.33333333333337, + "r_y1": 283.66666666666663, + "r_x2": 336.33333333333337, + "r_y2": 275.0, + "r_x3": 312.33333333333337, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.9636872099999999, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 4, + "label": "text", + "bbox": { + "l": 275.33333333333337, + "t": 182.33333333333334, + "r": 343.66666666666663, + "b": 191.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9056976437568665, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 275.33333333333337, + "r_y0": 191.0, + "r_x1": 304.0, + "r_y1": 191.0, + "r_x2": 304.0, + "r_y2": 182.33333333333334, + "r_x3": 275.33333333333337, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9609484899999999, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 308.0, + "r_y0": 191.0, + "r_x1": 343.66666666666663, + "r_y1": 191.0, + "r_x2": 343.66666666666663, + "r_y2": 182.33333333333334, + "r_x3": 308.0, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95935837, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 381.3333333333333, + "t": 182.33333333333334, + "r": 479.3333333333333, + "b": 191.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9007152318954468, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 381.3333333333333, + "r_y0": 191.0, + "r_x1": 410.3333333333333, + "r_y1": 191.0, + "r_x2": 410.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 381.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95280136, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 414.3333333333333, + "r_y0": 191.0, + "r_x1": 440.3333333333333, + "r_y1": 191.0, + "r_x2": 440.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 414.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9649115, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 443.3333333333333, + "r_y0": 191.0, + "r_x1": 479.3333333333333, + "r_y1": 191.0, + "r_x2": 479.3333333333333, + "r_y2": 182.33333333333334, + "r_x3": 443.3333333333333, + "r_y3": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9639427899999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 1, + "label": "text", + "bbox": { + "l": 175.0, + "t": 228.66666666666669, + "r": 225.66666666666669, + "b": 237.33333333333331, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9129480123519897, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 175.0, + "r_y0": 237.33333333333331, + "r_x1": 204.0, + "r_y1": 237.33333333333331, + "r_x2": 204.0, + "r_y2": 228.66666666666669, + "r_x3": 175.0, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96050453, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 208.0, + "r_y0": 237.33333333333331, + "r_x1": 225.66666666666669, + "r_y1": 237.33333333333331, + "r_x2": 225.66666666666669, + "r_y2": 231.0, + "r_x3": 208.0, + "r_y3": 231.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9623416899999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 286.0, + "t": 228.66666666666669, + "r": 333.0, + "b": 237.33333333333331, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9123309850692749, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 286.0, + "r_y0": 237.33333333333331, + "r_x1": 313.0, + "r_y1": 237.33333333333331, + "r_x2": 313.0, + "r_y2": 231.0, + "r_x3": 286.0, + "r_y3": 231.0, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.96279846, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 317.0, + "r_y0": 237.33333333333331, + "r_x1": 333.0, + "r_y1": 237.33333333333331, + "r_x2": 333.0, + "r_y2": 228.66666666666669, + "r_x3": 317.0, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.96231712, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 398.3333333333333, + "t": 228.66666666666669, + "r": 463.0, + "b": 237.33333333333331, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.8969476819038391, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 398.3333333333333, + "r_y0": 237.33333333333331, + "r_x1": 422.0, + "r_y1": 237.33333333333331, + "r_x2": 422.0, + "r_y2": 228.66666666666669, + "r_x3": 398.3333333333333, + "r_y3": 228.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.96670181, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 426.0, + "r_y0": 237.33333333333331, + "r_x1": 463.0, + "r_y1": 237.33333333333331, + "r_x2": 463.0, + "r_y2": 229.0, + "r_x3": 426.0, + "r_y3": 229.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.9589679700000001, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 0, + "label": "text", + "bbox": { + "l": 160.33333333333334, + "t": 275.0, + "r": 240.33333333333331, + "b": 283.66666666666663, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9129647612571716, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 160.33333333333334, + "r_y0": 283.66666666666663, + "r_x1": 189.33333333333331, + "r_y1": 283.66666666666663, + "r_x2": 189.33333333333331, + "r_y2": 275.0, + "r_x3": 160.33333333333334, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95674171, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 193.0, + "r_y0": 283.66666666666663, + "r_x1": 219.0, + "r_y1": 283.66666666666663, + "r_x2": 219.0, + "r_y2": 275.0, + "r_x3": 193.0, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9601168099999999, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 222.66666666666669, + "r_y0": 283.66666666666663, + "r_x1": 240.33333333333331, + "r_y1": 283.66666666666663, + "r_x2": 240.33333333333331, + "r_y2": 277.33333333333337, + "r_x3": 222.66666666666669, + "r_y3": 277.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.96364174, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 283.0, + "t": 275.0, + "r": 336.33333333333337, + "b": 283.66666666666663, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9102913737297058, + "cells": [ + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.0, + "r_y0": 283.66666666666663, + "r_x1": 309.0, + "r_y1": 283.66666666666663, + "r_x2": 309.0, + "r_y2": 275.0, + "r_x3": 283.0, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9636872099999999, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.33333333333337, + "r_y0": 283.66666666666663, + "r_x1": 336.33333333333337, + "r_y1": 283.66666666666663, + "r_x2": 336.33333333333337, + "r_y2": 275.0, + "r_x3": 312.33333333333337, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.9636872099999999, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package" + "text": null } ], "headers": [] diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.json index 8de137d4..07e64090 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.json +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.json @@ -27,37 +27,42 @@ "file-info": { "filename": "ocr_test_rotated_180.pdf", "filename-prov": null, - "document-hash": "a9cbfe0f2a71171face9ee31d2347ca4195649670ad75680520d67d4a863f982", + "document-hash": "687553cff95da8e2898fa50a68986ee2a3735ba5d287615e03c0d40fd3b33758", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [ { - "hash": "baca27070f05dd84cf0903ded39bcf0fc1fa6ef0ac390e79cf8ba90c8c33ba49", + "hash": "9e7213c0aa5ff85dfdb9a5b7566dfd229a4c5b8a4e289dd68655ddb1197c3b1f", "model": "default", "page": 1 } ] }, "main-text": [ + { + "name": "Table", + "type": "table", + "$ref": "#/tables/0" + }, { "prov": [ { "bbox": [ - 441.2561096985719, - 131.89488404865142, - 522.0347860494834, - 151.87873262042876 + 238.78076, + 124.28821000000005, + 540.0, + 136.57727 ], "page": 1, "span": [ 0, - 7 + 71 ], "__ref_s3_data": null } ], - "text": "package", + "text": "ehT t se t w strats it modnar emos h t xe t dna t a neh t elba i egam :", "type": "paragraph", "payload": null, "name": "Text", @@ -67,20 +72,20 @@ "prov": [ { "bbox": [ - 89.23887497045128, - 77.02339852098021, - 523.208764293368, - 124.75312428291147 + 194.03979, + 72.85058600000002, + 410.73657, + 101.89746000000002 ], "page": 1, "span": [ 0, - 86 + 20 ], "__ref_s3_data": null } ], - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", + "text": "tset elbat a si sihT", "type": "paragraph", "payload": null, "name": "Text", @@ -88,15 +93,267 @@ } ], "figures": [], - "tables": [], + "tables": [ + { + "prov": [ + { + "bbox": [ + 112.69406127929688, + 163.70050048828125, + 470.0718078613281, + 302.27655029296875 + ], + "page": 1, + "span": [ + 0, + 0 + ], + "__ref_s3_data": null + } + ], + "text": "", + "type": "table", + "payload": null, + "#-cols": 3, + "#-rows": 3, + "data": [ + [ + { + "bbox": null, + "spans": [ + [ + 0, + 0 + ] + ], + "text": "", + "type": "body" + }, + { + "bbox": [ + 303.0, + 508.3333333333333, + 329.0, + 517.0 + ], + "spans": [ + [ + 0, + 1 + ] + ], + "text": "other don't", + "type": "col_header", + "col": 1, + "col-header": true, + "col-span": [ + 1, + 2 + ], + "row": 0, + "row-header": false, + "row-span": [ + 0, + 1 + ] + }, + { + "bbox": [ + 422.6666666666667, + 508.3333333333333, + 451.6666666666667, + 517.0 + ], + "spans": [ + [ + 0, + 2 + ] + ], + "text": "Some other row", + "type": "col_header", + "col": 2, + "col-header": true, + "col-span": [ + 2, + 3 + ], + "row": 0, + "row-header": false, + "row-span": [ + 0, + 1 + ] + } + ], + [ + { + "bbox": [ + 190.0, + 554.6666666666666, + 213.66666666666666, + 563.3333333333334 + ], + "spans": [ + [ + 1, + 0 + ] + ], + "text": "have content", + "type": "row_header", + "col": 0, + "col-header": false, + "col-span": [ + 0, + 1 + ], + "row": 1, + "row-header": true, + "row-span": [ + 1, + 2 + ] + }, + { + "bbox": [ + 299.0, + 554.6666666666666, + 326.33333333333337, + 561.0 + ], + "spans": [ + [ + 1, + 1 + ] + ], + "text": "some cell", + "type": "body", + "col": 1, + "col-header": false, + "col-span": [ + 1, + 2 + ], + "row": 1, + "row-header": false, + "row-span": [ + 1, + 2 + ] + }, + { + "bbox": [ + 408.3333333333333, + 554.6666666666666, + 437.3333333333333, + 563.3333333333334 + ], + "spans": [ + [ + 1, + 2 + ] + ], + "text": "Some row", + "type": "body", + "col": 2, + "col-header": false, + "col-span": [ + 2, + 3 + ], + "row": 1, + "row-header": false, + "row-span": [ + 1, + 2 + ] + } + ], + [ + { + "bbox": [ + 201.66666666666669, + 601.0, + 230.66666666666666, + 609.6666666666666 + ], + "spans": [ + [ + 2, + 0 + ] + ], + "text": "Some other column", + "type": "row_header", + "col": 0, + "col-header": false, + "col-span": [ + 0, + 1 + ], + "row": 2, + "row-header": true, + "row-span": [ + 2, + 3 + ] + }, + { + "bbox": [ + 308.0, + 601.0, + 337.0, + 609.6666666666666 + ], + "spans": [ + [ + 2, + 1 + ] + ], + "text": "Some column", + "type": "body", + "col": 1, + "col-header": false, + "col-span": [ + 1, + 2 + ], + "row": 2, + "row-header": false, + "row-span": [ + 2, + 3 + ] + }, + { + "bbox": null, + "spans": [ + [ + 2, + 2 + ] + ], + "text": "", + "type": "body" + } + ] + ], + "model": null, + "bounding-box": null + } + ], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [ { - "height": 841.9216918945312, + "height": 792.0, "page": 1, - "width": 595.201171875 + "width": 612.0 } ], "page-footers": [], diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.pages.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.pages.json index 962861d9..c8d38184 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.pages.json +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.pages.json @@ -2,8 +2,8 @@ { "page_no": 0, "size": { - "width": 595.201171875, - "height": 841.9216918945312 + "width": 612.0, + "height": 792.0 }, "parsed_page": { "dimension": { @@ -11,47 +11,47 @@ "rect": { "r_x0": 0.0, "r_y0": 0.0, - "r_x1": 595.201171875, + "r_x1": 612.0, "r_y1": 0.0, - "r_x2": 595.201171875, - "r_y2": 841.9216918945312, + "r_x2": 612.0, + "r_y2": 792.0, "r_x3": 0.0, - "r_y3": 841.9216918945312, + "r_y3": 792.0, "coord_origin": "BOTTOMLEFT" }, "boundary_type": "crop_box", "art_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "bleed_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "crop_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "media_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "trim_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" } @@ -69,21 +69,21 @@ "a": 255 }, "rect": { - "r_x0": 89.2388782764286, - "r_y0": 764.898293373551, - "r_x1": 521.9863147998661, - "r_y1": 764.898293373551, - "r_x2": 521.9863147998661, - "r_y2": 744.0929853494625, - "r_x3": 89.2388782764286, - "r_y3": 744.0929853494625, + "r_x0": 194.03979, + "r_y0": 719.149414, + "r_x1": 410.73657, + "r_y1": 719.149414, + "r_x2": 410.73657, + "r_y2": 690.10254, + "r_x3": 194.03979, + "r_y3": 690.10254, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": " tset elbat a si sihT", + "orig": " tset elbat a si sihT", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -94,21 +94,21 @@ "a": 255 }, "rect": { - "r_x0": 89.23887497045128, - "r_y0": 739.1977118987292, - "r_x1": 523.208764293368, - "r_y1": 739.1977118987292, - "r_x2": 523.208764293368, - "r_y2": 717.1685676116198, - "r_x3": 89.23887497045128, - "r_y3": 717.1685676116198, + "r_x0": 521.0545, + "r_y0": 667.71179, + "r_x1": 540.0, + "r_y1": 667.71179, + "r_x2": 540.0, + "r_y2": 655.42273, + "r_x3": 521.0545, + "r_y3": 655.42273, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "ehT", + "orig": "ehT", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 2, @@ -119,20 +119,820 @@ "a": 255 }, "rect": { - "r_x0": 441.2561096985719, - "r_y0": 710.0268078458798, - "r_x1": 522.0347860494834, - "r_y1": 710.0268078458798, - "r_x2": 522.0347860494834, - "r_y2": 690.0429592741025, - "r_x3": 441.2561096985719, - "r_y3": 690.0429592741025, + "r_x0": 518.00269, + "r_y0": 667.71179, + "r_x1": 518.00488, + "r_y1": 667.71179, + "r_x2": 518.00488, + "r_y2": 655.42273, + "r_x3": 518.00269, + "r_y3": 655.42273, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": " t", + "orig": " t", "text_direction": "left_to_right", "confidence": 1.0, + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 503.33759000000003, + "r_y0": 667.71179, + "r_x1": 514.95093, + "r_y1": 667.71179, + "r_x2": 514.95093, + "r_y2": 655.42273, + "r_x3": 503.33759000000003, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "se", + "orig": "se", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 500.28534, + "r_y0": 667.71179, + "r_x1": 500.28751, + "r_y1": 667.71179, + "r_x2": 500.28751, + "r_y2": 655.42273, + "r_x3": 500.28534, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "t ", + "orig": "t ", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.36172000000005, + "r_y0": 667.71179, + "r_x1": 497.23352, + "r_y1": 667.71179, + "r_x2": 497.23352, + "r_y2": 655.42273, + "r_x3": 459.36172000000005, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "w strats", + "orig": "w strats", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 456.92352, + "r_y0": 667.71179, + "r_x1": 456.92526, + "r_y1": 667.71179, + "r_x2": 456.92526, + "r_y2": 655.42273, + "r_x3": 456.92352, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "it", + "orig": "it", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 377.49374, + "r_y0": 667.71179, + "r_x1": 453.87128, + "r_y1": 667.71179, + "r_x2": 453.87128, + "r_y2": 655.42273, + "r_x3": 377.49374, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "modnar emos h", + "orig": "modnar emos h", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 374.44409, + "r_y0": 667.71179, + "r_x1": 374.44629, + "r_y1": 667.71179, + "r_x2": 374.44629, + "r_y2": 655.42273, + "r_x3": 374.44409, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": " t", + "orig": " t", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 359.77896, + "r_y0": 667.71179, + "r_x1": 371.3923, + "r_y1": 667.71179, + "r_x2": 371.3923, + "r_y2": 655.42273, + "r_x3": 359.77896, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "xe", + "orig": "xe", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 356.72672, + "r_y0": 667.71179, + "r_x1": 356.72888, + "r_y1": 667.71179, + "r_x2": 356.72888, + "r_y2": 655.42273, + "r_x3": 356.72672, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "t ", + "orig": "t ", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 335.3306, + "r_y0": 667.71179, + "r_x1": 353.67493, + "r_y1": 667.71179, + "r_x2": 353.67493, + "r_y2": 655.42273, + "r_x3": 335.3306, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "dna", + "orig": "dna", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 332.27878, + "r_y0": 667.71179, + "r_x1": 332.28094, + "r_y1": 667.71179, + "r_x2": 332.28094, + "r_y2": 655.42273, + "r_x3": 332.27878, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": " t", + "orig": " t", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 301.7153, + "r_y0": 667.71179, + "r_x1": 329.22699, + "r_y1": 667.71179, + "r_x2": 329.22699, + "r_y2": 655.42273, + "r_x3": 301.7153, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "a neh", + "orig": "a neh", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 298.66348, + "r_y0": 667.71179, + "r_x1": 298.66565, + "r_y1": 667.71179, + "r_x2": 298.66565, + "r_y2": 655.42273, + "r_x3": 298.66348, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": " t", + "orig": " t", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 274.82526, + "r_y0": 667.71179, + "r_x1": 295.61169, + "r_y1": 667.71179, + "r_x2": 295.61169, + "r_y2": 655.42273, + "r_x3": 274.82526, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "elba", + "orig": "elba", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 271.77344, + "r_y0": 667.71179, + "r_x1": 271.7756, + "r_y1": 667.71179, + "r_x2": 271.7756, + "r_y2": 655.42273, + "r_x3": 271.77344, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": " i", + "orig": " i", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 241.83258, + "r_y0": 667.71179, + "r_x1": 269.3335, + "r_y1": 667.71179, + "r_x2": 269.3335, + "r_y2": 655.42273, + "r_x3": 241.83258, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "egam", + "orig": "egam", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 18, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 238.78076, + "r_y0": 667.71179, + "r_x1": 238.78296, + "r_y1": 667.71179, + "r_x2": 238.78296, + "r_y2": 655.42273, + "r_x3": 238.78076, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": ": ", + "orig": ": ", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 337.0, + "r_y0": 601.0, + "r_x1": 308.0, + "r_y1": 601.0, + "r_x2": 308.0, + "r_y2": 609.6666666666666, + "r_x3": 337.0, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95841644, + "from_ocr": true + }, + { + "index": 20, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 304.0, + "r_y0": 601.0, + "r_x1": 268.33333333333337, + "r_y1": 601.0, + "r_x2": 268.33333333333337, + "r_y2": 609.3333333333334, + "r_x3": 304.0, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95624527, + "from_ocr": true + }, + { + "index": 21, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 230.66666666666666, + "r_y0": 601.0, + "r_x1": 201.66666666666669, + "r_y1": 601.0, + "r_x2": 201.66666666666669, + "r_y2": 609.6666666666666, + "r_x3": 230.66666666666666, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96296555, + "from_ocr": true + }, + { + "index": 22, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 197.66666666666669, + "r_y0": 601.0, + "r_x1": 172.0, + "r_y1": 601.0, + "r_x2": 172.0, + "r_y2": 609.3333333333334, + "r_x3": 197.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96467484, + "from_ocr": true + }, + { + "index": 23, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 168.66666666666669, + "r_y0": 601.0, + "r_x1": 133.0, + "r_y1": 601.0, + "r_x2": 133.0, + "r_y2": 609.3333333333334, + "r_x3": 168.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95497986, + "from_ocr": true + }, + { + "index": 24, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 437.3333333333333, + "r_y0": 554.6666666666666, + "r_x1": 408.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 408.3333333333333, + "r_y2": 563.3333333333334, + "r_x3": 437.3333333333333, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95944489, + "from_ocr": true + }, + { + "index": 25, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 404.0, + "r_y0": 554.6666666666666, + "r_x1": 386.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 386.3333333333333, + "r_y2": 561.0, + "r_x3": 404.0, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9680950199999999, + "from_ocr": true + }, + { + "index": 26, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 299.0, + "r_y1": 554.6666666666666, + "r_x2": 299.0, + "r_y2": 561.0, + "r_x3": 326.33333333333337, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9569136, + "from_ocr": true + }, + { + "index": 27, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 295.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 279.0, + "r_y1": 554.6666666666666, + "r_x2": 279.0, + "r_y2": 563.3333333333334, + "r_x3": 295.33333333333337, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.9622145799999999, + "from_ocr": true + }, + { + "index": 28, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 213.66666666666666, + "r_y0": 554.6666666666666, + "r_x1": 190.0, + "r_y1": 554.6666666666666, + "r_x2": 190.0, + "r_y2": 563.3333333333334, + "r_x3": 213.66666666666666, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.96403, + "from_ocr": true + }, + { + "index": 29, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 186.0, + "r_y0": 554.6666666666666, + "r_x1": 149.0, + "r_y1": 554.6666666666666, + "r_x2": 149.0, + "r_y2": 563.0, + "r_x3": 186.0, + "r_y3": 563.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96691612, + "from_ocr": true + }, + { + "index": 30, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 451.6666666666667, + "r_y0": 508.3333333333333, + "r_x1": 422.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 422.6666666666667, + "r_y2": 517.0, + "r_x3": 451.6666666666667, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9684503200000001, + "from_ocr": true + }, + { + "index": 31, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 419.0, + "r_y0": 508.3333333333333, + "r_x1": 393.0, + "r_y1": 508.3333333333333, + "r_x2": 393.0, + "r_y2": 516.6666666666666, + "r_x3": 419.0, + "r_y3": 516.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96073517, + "from_ocr": true + }, + { + "index": 32, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 389.3333333333333, + "r_y0": 508.3333333333333, + "r_x1": 371.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 371.6666666666667, + "r_y2": 514.6666666666666, + "r_x3": 389.3333333333333, + "r_y3": 514.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9615368700000001, + "from_ocr": true + }, + { + "index": 33, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 329.0, + "r_y0": 508.3333333333333, + "r_x1": 303.0, + "r_y1": 508.3333333333333, + "r_x2": 303.0, + "r_y2": 517.0, + "r_x3": 329.0, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9626261100000001, + "from_ocr": true + }, + { + "index": 34, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 299.66666666666663, + "r_y0": 508.3333333333333, + "r_x1": 275.66666666666663, + "r_y1": 508.3333333333333, + "r_x2": 275.66666666666663, + "r_y2": 517.0, + "r_x3": 299.66666666666663, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.96669136, "from_ocr": true } ], @@ -146,16 +946,16 @@ "layout": { "clusters": [ { - "id": 0, + "id": 8, "label": "text", "bbox": { - "l": 89.23887497045128, - "t": 717.1685676116198, - "r": 523.208764293368, - "b": 764.898293373551, + "l": 194.03979, + "t": 690.10254, + "r": 410.73657, + "b": 719.149414, "coord_origin": "TOPLEFT" }, - "confidence": 0.7318570613861084, + "confidence": 0.7134009003639221, "cells": [ { "index": 0, @@ -166,22 +966,37 @@ "a": 255 }, "rect": { - "r_x0": 89.2388782764286, - "r_y0": 764.898293373551, - "r_x1": 521.9863147998661, - "r_y1": 764.898293373551, - "r_x2": 521.9863147998661, - "r_y2": 744.0929853494625, - "r_x3": 89.2388782764286, - "r_y3": 744.0929853494625, + "r_x0": 194.03979, + "r_y0": 719.149414, + "r_x1": 410.73657, + "r_y1": 719.149414, + "r_x2": 410.73657, + "r_y2": 690.10254, + "r_x3": 194.03979, + "r_y3": 690.10254, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": " tset elbat a si sihT", + "orig": " tset elbat a si sihT", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - }, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 0, + "label": "text", + "bbox": { + "l": 238.78076, + "t": 655.42273, + "r": 540.0, + "b": 667.71179, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.8374139070510864, + "cells": [ { "index": 1, "rgba": { @@ -191,37 +1006,22 @@ "a": 255 }, "rect": { - "r_x0": 89.23887497045128, - "r_y0": 739.1977118987292, - "r_x1": 523.208764293368, - "r_y1": 739.1977118987292, - "r_x2": 523.208764293368, - "r_y2": 717.1685676116198, - "r_x3": 89.23887497045128, - "r_y3": 717.1685676116198, + "r_x0": 521.0545, + "r_y0": 667.71179, + "r_x1": 540.0, + "r_y1": 667.71179, + "r_x2": 540.0, + "r_y2": 655.42273, + "r_x3": 521.0545, + "r_y3": 655.42273, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "ehT", + "orig": "ehT", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - { - "id": 2, - "label": "text", - "bbox": { - "l": 441.2561096985719, - "t": 690.0429592741025, - "r": 522.0347860494834, - "b": 710.0268078458798, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.5982133150100708, - "cells": [ + "from_ocr": false + }, { "index": 2, "rgba": { @@ -231,29 +1031,2429 @@ "a": 255 }, "rect": { - "r_x0": 441.2561096985719, - "r_y0": 710.0268078458798, - "r_x1": 522.0347860494834, - "r_y1": 710.0268078458798, - "r_x2": 522.0347860494834, - "r_y2": 690.0429592741025, - "r_x3": 441.2561096985719, - "r_y3": 690.0429592741025, + "r_x0": 518.00269, + "r_y0": 667.71179, + "r_x1": 518.00488, + "r_y1": 667.71179, + "r_x2": 518.00488, + "r_y2": 655.42273, + "r_x3": 518.00269, + "r_y3": 655.42273, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": " t", + "orig": " t", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 503.33759000000003, + "r_y0": 667.71179, + "r_x1": 514.95093, + "r_y1": 667.71179, + "r_x2": 514.95093, + "r_y2": 655.42273, + "r_x3": 503.33759000000003, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "se", + "orig": "se", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 500.28534, + "r_y0": 667.71179, + "r_x1": 500.28751, + "r_y1": 667.71179, + "r_x2": 500.28751, + "r_y2": 655.42273, + "r_x3": 500.28534, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "t ", + "orig": "t ", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.36172000000005, + "r_y0": 667.71179, + "r_x1": 497.23352, + "r_y1": 667.71179, + "r_x2": 497.23352, + "r_y2": 655.42273, + "r_x3": 459.36172000000005, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "w strats", + "orig": "w strats", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 456.92352, + "r_y0": 667.71179, + "r_x1": 456.92526, + "r_y1": 667.71179, + "r_x2": 456.92526, + "r_y2": 655.42273, + "r_x3": 456.92352, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "it", + "orig": "it", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 377.49374, + "r_y0": 667.71179, + "r_x1": 453.87128, + "r_y1": 667.71179, + "r_x2": 453.87128, + "r_y2": 655.42273, + "r_x3": 377.49374, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "modnar emos h", + "orig": "modnar emos h", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 374.44409, + "r_y0": 667.71179, + "r_x1": 374.44629, + "r_y1": 667.71179, + "r_x2": 374.44629, + "r_y2": 655.42273, + "r_x3": 374.44409, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": " t", + "orig": " t", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 359.77896, + "r_y0": 667.71179, + "r_x1": 371.3923, + "r_y1": 667.71179, + "r_x2": 371.3923, + "r_y2": 655.42273, + "r_x3": 359.77896, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "xe", + "orig": "xe", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 356.72672, + "r_y0": 667.71179, + "r_x1": 356.72888, + "r_y1": 667.71179, + "r_x2": 356.72888, + "r_y2": 655.42273, + "r_x3": 356.72672, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "t ", + "orig": "t ", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 335.3306, + "r_y0": 667.71179, + "r_x1": 353.67493, + "r_y1": 667.71179, + "r_x2": 353.67493, + "r_y2": 655.42273, + "r_x3": 335.3306, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "dna", + "orig": "dna", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 332.27878, + "r_y0": 667.71179, + "r_x1": 332.28094, + "r_y1": 667.71179, + "r_x2": 332.28094, + "r_y2": 655.42273, + "r_x3": 332.27878, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": " t", + "orig": " t", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 301.7153, + "r_y0": 667.71179, + "r_x1": 329.22699, + "r_y1": 667.71179, + "r_x2": 329.22699, + "r_y2": 655.42273, + "r_x3": 301.7153, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "a neh", + "orig": "a neh", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 298.66348, + "r_y0": 667.71179, + "r_x1": 298.66565, + "r_y1": 667.71179, + "r_x2": 298.66565, + "r_y2": 655.42273, + "r_x3": 298.66348, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": " t", + "orig": " t", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 274.82526, + "r_y0": 667.71179, + "r_x1": 295.61169, + "r_y1": 667.71179, + "r_x2": 295.61169, + "r_y2": 655.42273, + "r_x3": 274.82526, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "elba", + "orig": "elba", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 271.77344, + "r_y0": 667.71179, + "r_x1": 271.7756, + "r_y1": 667.71179, + "r_x2": 271.7756, + "r_y2": 655.42273, + "r_x3": 271.77344, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": " i", + "orig": " i", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 241.83258, + "r_y0": 667.71179, + "r_x1": 269.3335, + "r_y1": 667.71179, + "r_x2": 269.3335, + "r_y2": 655.42273, + "r_x3": 241.83258, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "egam", + "orig": "egam", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 18, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 238.78076, + "r_y0": 667.71179, + "r_x1": 238.78296, + "r_y1": 667.71179, + "r_x2": 238.78296, + "r_y2": 655.42273, + "r_x3": 238.78076, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": ": ", + "orig": ": ", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], "children": [] + }, + { + "id": 9, + "label": "table", + "bbox": { + "l": 112.69406127929688, + "t": 489.72344970703125, + "r": 470.0718078613281, + "b": 628.2994995117188, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.6408323049545288, + "cells": [ + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 337.0, + "r_y0": 601.0, + "r_x1": 308.0, + "r_y1": 601.0, + "r_x2": 308.0, + "r_y2": 609.6666666666666, + "r_x3": 337.0, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95841644, + "from_ocr": true + }, + { + "index": 20, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 304.0, + "r_y0": 601.0, + "r_x1": 268.33333333333337, + "r_y1": 601.0, + "r_x2": 268.33333333333337, + "r_y2": 609.3333333333334, + "r_x3": 304.0, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95624527, + "from_ocr": true + }, + { + "index": 21, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 230.66666666666666, + "r_y0": 601.0, + "r_x1": 201.66666666666669, + "r_y1": 601.0, + "r_x2": 201.66666666666669, + "r_y2": 609.6666666666666, + "r_x3": 230.66666666666666, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96296555, + "from_ocr": true + }, + { + "index": 22, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 197.66666666666669, + "r_y0": 601.0, + "r_x1": 172.0, + "r_y1": 601.0, + "r_x2": 172.0, + "r_y2": 609.3333333333334, + "r_x3": 197.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96467484, + "from_ocr": true + }, + { + "index": 23, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 168.66666666666669, + "r_y0": 601.0, + "r_x1": 133.0, + "r_y1": 601.0, + "r_x2": 133.0, + "r_y2": 609.3333333333334, + "r_x3": 168.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95497986, + "from_ocr": true + }, + { + "index": 24, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 437.3333333333333, + "r_y0": 554.6666666666666, + "r_x1": 408.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 408.3333333333333, + "r_y2": 563.3333333333334, + "r_x3": 437.3333333333333, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95944489, + "from_ocr": true + }, + { + "index": 25, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 404.0, + "r_y0": 554.6666666666666, + "r_x1": 386.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 386.3333333333333, + "r_y2": 561.0, + "r_x3": 404.0, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9680950199999999, + "from_ocr": true + }, + { + "index": 26, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 299.0, + "r_y1": 554.6666666666666, + "r_x2": 299.0, + "r_y2": 561.0, + "r_x3": 326.33333333333337, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9569136, + "from_ocr": true + }, + { + "index": 27, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 295.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 279.0, + "r_y1": 554.6666666666666, + "r_x2": 279.0, + "r_y2": 563.3333333333334, + "r_x3": 295.33333333333337, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.9622145799999999, + "from_ocr": true + }, + { + "index": 28, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 213.66666666666666, + "r_y0": 554.6666666666666, + "r_x1": 190.0, + "r_y1": 554.6666666666666, + "r_x2": 190.0, + "r_y2": 563.3333333333334, + "r_x3": 213.66666666666666, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.96403, + "from_ocr": true + }, + { + "index": 29, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 186.0, + "r_y0": 554.6666666666666, + "r_x1": 149.0, + "r_y1": 554.6666666666666, + "r_x2": 149.0, + "r_y2": 563.0, + "r_x3": 186.0, + "r_y3": 563.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96691612, + "from_ocr": true + }, + { + "index": 30, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 451.6666666666667, + "r_y0": 508.3333333333333, + "r_x1": 422.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 422.6666666666667, + "r_y2": 517.0, + "r_x3": 451.6666666666667, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9684503200000001, + "from_ocr": true + }, + { + "index": 31, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 419.0, + "r_y0": 508.3333333333333, + "r_x1": 393.0, + "r_y1": 508.3333333333333, + "r_x2": 393.0, + "r_y2": 516.6666666666666, + "r_x3": 419.0, + "r_y3": 516.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96073517, + "from_ocr": true + }, + { + "index": 32, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 389.3333333333333, + "r_y0": 508.3333333333333, + "r_x1": 371.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 371.6666666666667, + "r_y2": 514.6666666666666, + "r_x3": 389.3333333333333, + "r_y3": 514.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9615368700000001, + "from_ocr": true + }, + { + "index": 33, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 329.0, + "r_y0": 508.3333333333333, + "r_x1": 303.0, + "r_y1": 508.3333333333333, + "r_x2": 303.0, + "r_y2": 517.0, + "r_x3": 329.0, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9626261100000001, + "from_ocr": true + }, + { + "index": 34, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 299.66666666666663, + "r_y0": 508.3333333333333, + "r_x1": 275.66666666666663, + "r_y1": 508.3333333333333, + "r_x2": 275.66666666666663, + "r_y2": 517.0, + "r_x3": 299.66666666666663, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.96669136, + "from_ocr": true + } + ], + "children": [ + { + "id": 7, + "label": "text", + "bbox": { + "l": 268.33333333333337, + "t": 601.0, + "r": 337.0, + "b": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7441245913505554, + "cells": [ + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 337.0, + "r_y0": 601.0, + "r_x1": 308.0, + "r_y1": 601.0, + "r_x2": 308.0, + "r_y2": 609.6666666666666, + "r_x3": 337.0, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95841644, + "from_ocr": true + }, + { + "index": 20, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 304.0, + "r_y0": 601.0, + "r_x1": 268.33333333333337, + "r_y1": 601.0, + "r_x2": 268.33333333333337, + "r_y2": 609.3333333333334, + "r_x3": 304.0, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95624527, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 1, + "label": "text", + "bbox": { + "l": 133.0, + "t": 601.0, + "r": 230.66666666666666, + "b": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7668525576591492, + "cells": [ + { + "index": 21, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 230.66666666666666, + "r_y0": 601.0, + "r_x1": 201.66666666666669, + "r_y1": 601.0, + "r_x2": 201.66666666666669, + "r_y2": 609.6666666666666, + "r_x3": 230.66666666666666, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96296555, + "from_ocr": true + }, + { + "index": 22, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 197.66666666666669, + "r_y0": 601.0, + "r_x1": 172.0, + "r_y1": 601.0, + "r_x2": 172.0, + "r_y2": 609.3333333333334, + "r_x3": 197.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96467484, + "from_ocr": true + }, + { + "index": 23, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 168.66666666666669, + "r_y0": 601.0, + "r_x1": 133.0, + "r_y1": 601.0, + "r_x2": 133.0, + "r_y2": 609.3333333333334, + "r_x3": 168.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95497986, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 386.3333333333333, + "t": 554.6666666666666, + "r": 437.3333333333333, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7588309049606323, + "cells": [ + { + "index": 24, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 437.3333333333333, + "r_y0": 554.6666666666666, + "r_x1": 408.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 408.3333333333333, + "r_y2": 563.3333333333334, + "r_x3": 437.3333333333333, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95944489, + "from_ocr": true + }, + { + "index": 25, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 404.0, + "r_y0": 554.6666666666666, + "r_x1": 386.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 386.3333333333333, + "r_y2": 561.0, + "r_x3": 404.0, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9680950199999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 279.0, + "t": 554.6666666666666, + "r": 326.33333333333337, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7627862095832825, + "cells": [ + { + "index": 26, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 299.0, + "r_y1": 554.6666666666666, + "r_x2": 299.0, + "r_y2": 561.0, + "r_x3": 326.33333333333337, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9569136, + "from_ocr": true + }, + { + "index": 27, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 295.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 279.0, + "r_y1": 554.6666666666666, + "r_x2": 279.0, + "r_y2": 563.3333333333334, + "r_x3": 295.33333333333337, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.9622145799999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 149.0, + "t": 554.6666666666666, + "r": 213.66666666666666, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7541249394416809, + "cells": [ + { + "index": 28, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 213.66666666666666, + "r_y0": 554.6666666666666, + "r_x1": 190.0, + "r_y1": 554.6666666666666, + "r_x2": 190.0, + "r_y2": 563.3333333333334, + "r_x3": 213.66666666666666, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.96403, + "from_ocr": true + }, + { + "index": 29, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 186.0, + "r_y0": 554.6666666666666, + "r_x1": 149.0, + "r_y1": 554.6666666666666, + "r_x2": 149.0, + "r_y2": 563.0, + "r_x3": 186.0, + "r_y3": 563.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96691612, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 371.6666666666667, + "t": 508.3333333333333, + "r": 451.6666666666667, + "b": 517.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7616423964500427, + "cells": [ + { + "index": 30, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 451.6666666666667, + "r_y0": 508.3333333333333, + "r_x1": 422.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 422.6666666666667, + "r_y2": 517.0, + "r_x3": 451.6666666666667, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9684503200000001, + "from_ocr": true + }, + { + "index": 31, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 419.0, + "r_y0": 508.3333333333333, + "r_x1": 393.0, + "r_y1": 508.3333333333333, + "r_x2": 393.0, + "r_y2": 516.6666666666666, + "r_x3": 419.0, + "r_y3": 516.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96073517, + "from_ocr": true + }, + { + "index": 32, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 389.3333333333333, + "r_y0": 508.3333333333333, + "r_x1": 371.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 371.6666666666667, + "r_y2": 514.6666666666666, + "r_x3": 389.3333333333333, + "r_y3": 514.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9615368700000001, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 275.66666666666663, + "t": 508.3333333333333, + "r": 329.0, + "b": 517.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7468306422233582, + "cells": [ + { + "index": 33, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 329.0, + "r_y0": 508.3333333333333, + "r_x1": 303.0, + "r_y1": 508.3333333333333, + "r_x2": 303.0, + "r_y2": 517.0, + "r_x3": 329.0, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9626261100000001, + "from_ocr": true + }, + { + "index": 34, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 299.66666666666663, + "r_y0": 508.3333333333333, + "r_x1": 275.66666666666663, + "r_y1": 508.3333333333333, + "r_x2": 275.66666666666663, + "r_y2": 517.0, + "r_x3": 299.66666666666663, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.96669136, + "from_ocr": true + } + ], + "children": [] + } + ] } ] }, "tablestructure": { - "table_map": {} + "table_map": { + "9": { + "label": "table", + "id": 9, + "page_no": 0, + "cluster": { + "id": 9, + "label": "table", + "bbox": { + "l": 112.69406127929688, + "t": 489.72344970703125, + "r": 470.0718078613281, + "b": 628.2994995117188, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.6408323049545288, + "cells": [ + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 337.0, + "r_y0": 601.0, + "r_x1": 308.0, + "r_y1": 601.0, + "r_x2": 308.0, + "r_y2": 609.6666666666666, + "r_x3": 337.0, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95841644, + "from_ocr": true + }, + { + "index": 20, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 304.0, + "r_y0": 601.0, + "r_x1": 268.33333333333337, + "r_y1": 601.0, + "r_x2": 268.33333333333337, + "r_y2": 609.3333333333334, + "r_x3": 304.0, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95624527, + "from_ocr": true + }, + { + "index": 21, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 230.66666666666666, + "r_y0": 601.0, + "r_x1": 201.66666666666669, + "r_y1": 601.0, + "r_x2": 201.66666666666669, + "r_y2": 609.6666666666666, + "r_x3": 230.66666666666666, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96296555, + "from_ocr": true + }, + { + "index": 22, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 197.66666666666669, + "r_y0": 601.0, + "r_x1": 172.0, + "r_y1": 601.0, + "r_x2": 172.0, + "r_y2": 609.3333333333334, + "r_x3": 197.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96467484, + "from_ocr": true + }, + { + "index": 23, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 168.66666666666669, + "r_y0": 601.0, + "r_x1": 133.0, + "r_y1": 601.0, + "r_x2": 133.0, + "r_y2": 609.3333333333334, + "r_x3": 168.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95497986, + "from_ocr": true + }, + { + "index": 24, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 437.3333333333333, + "r_y0": 554.6666666666666, + "r_x1": 408.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 408.3333333333333, + "r_y2": 563.3333333333334, + "r_x3": 437.3333333333333, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95944489, + "from_ocr": true + }, + { + "index": 25, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 404.0, + "r_y0": 554.6666666666666, + "r_x1": 386.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 386.3333333333333, + "r_y2": 561.0, + "r_x3": 404.0, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9680950199999999, + "from_ocr": true + }, + { + "index": 26, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 299.0, + "r_y1": 554.6666666666666, + "r_x2": 299.0, + "r_y2": 561.0, + "r_x3": 326.33333333333337, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9569136, + "from_ocr": true + }, + { + "index": 27, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 295.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 279.0, + "r_y1": 554.6666666666666, + "r_x2": 279.0, + "r_y2": 563.3333333333334, + "r_x3": 295.33333333333337, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.9622145799999999, + "from_ocr": true + }, + { + "index": 28, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 213.66666666666666, + "r_y0": 554.6666666666666, + "r_x1": 190.0, + "r_y1": 554.6666666666666, + "r_x2": 190.0, + "r_y2": 563.3333333333334, + "r_x3": 213.66666666666666, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.96403, + "from_ocr": true + }, + { + "index": 29, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 186.0, + "r_y0": 554.6666666666666, + "r_x1": 149.0, + "r_y1": 554.6666666666666, + "r_x2": 149.0, + "r_y2": 563.0, + "r_x3": 186.0, + "r_y3": 563.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96691612, + "from_ocr": true + }, + { + "index": 30, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 451.6666666666667, + "r_y0": 508.3333333333333, + "r_x1": 422.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 422.6666666666667, + "r_y2": 517.0, + "r_x3": 451.6666666666667, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9684503200000001, + "from_ocr": true + }, + { + "index": 31, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 419.0, + "r_y0": 508.3333333333333, + "r_x1": 393.0, + "r_y1": 508.3333333333333, + "r_x2": 393.0, + "r_y2": 516.6666666666666, + "r_x3": 419.0, + "r_y3": 516.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96073517, + "from_ocr": true + }, + { + "index": 32, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 389.3333333333333, + "r_y0": 508.3333333333333, + "r_x1": 371.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 371.6666666666667, + "r_y2": 514.6666666666666, + "r_x3": 389.3333333333333, + "r_y3": 514.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9615368700000001, + "from_ocr": true + }, + { + "index": 33, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 329.0, + "r_y0": 508.3333333333333, + "r_x1": 303.0, + "r_y1": 508.3333333333333, + "r_x2": 303.0, + "r_y2": 517.0, + "r_x3": 329.0, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9626261100000001, + "from_ocr": true + }, + { + "index": 34, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 299.66666666666663, + "r_y0": 508.3333333333333, + "r_x1": 275.66666666666663, + "r_y1": 508.3333333333333, + "r_x2": 275.66666666666663, + "r_y2": 517.0, + "r_x3": 299.66666666666663, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.96669136, + "from_ocr": true + } + ], + "children": [ + { + "id": 7, + "label": "text", + "bbox": { + "l": 268.33333333333337, + "t": 601.0, + "r": 337.0, + "b": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7441245913505554, + "cells": [ + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 337.0, + "r_y0": 601.0, + "r_x1": 308.0, + "r_y1": 601.0, + "r_x2": 308.0, + "r_y2": 609.6666666666666, + "r_x3": 337.0, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95841644, + "from_ocr": true + }, + { + "index": 20, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 304.0, + "r_y0": 601.0, + "r_x1": 268.33333333333337, + "r_y1": 601.0, + "r_x2": 268.33333333333337, + "r_y2": 609.3333333333334, + "r_x3": 304.0, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95624527, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 1, + "label": "text", + "bbox": { + "l": 133.0, + "t": 601.0, + "r": 230.66666666666666, + "b": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7668525576591492, + "cells": [ + { + "index": 21, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 230.66666666666666, + "r_y0": 601.0, + "r_x1": 201.66666666666669, + "r_y1": 601.0, + "r_x2": 201.66666666666669, + "r_y2": 609.6666666666666, + "r_x3": 230.66666666666666, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96296555, + "from_ocr": true + }, + { + "index": 22, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 197.66666666666669, + "r_y0": 601.0, + "r_x1": 172.0, + "r_y1": 601.0, + "r_x2": 172.0, + "r_y2": 609.3333333333334, + "r_x3": 197.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96467484, + "from_ocr": true + }, + { + "index": 23, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 168.66666666666669, + "r_y0": 601.0, + "r_x1": 133.0, + "r_y1": 601.0, + "r_x2": 133.0, + "r_y2": 609.3333333333334, + "r_x3": 168.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95497986, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 386.3333333333333, + "t": 554.6666666666666, + "r": 437.3333333333333, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7588309049606323, + "cells": [ + { + "index": 24, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 437.3333333333333, + "r_y0": 554.6666666666666, + "r_x1": 408.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 408.3333333333333, + "r_y2": 563.3333333333334, + "r_x3": 437.3333333333333, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95944489, + "from_ocr": true + }, + { + "index": 25, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 404.0, + "r_y0": 554.6666666666666, + "r_x1": 386.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 386.3333333333333, + "r_y2": 561.0, + "r_x3": 404.0, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9680950199999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 279.0, + "t": 554.6666666666666, + "r": 326.33333333333337, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7627862095832825, + "cells": [ + { + "index": 26, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 299.0, + "r_y1": 554.6666666666666, + "r_x2": 299.0, + "r_y2": 561.0, + "r_x3": 326.33333333333337, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9569136, + "from_ocr": true + }, + { + "index": 27, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 295.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 279.0, + "r_y1": 554.6666666666666, + "r_x2": 279.0, + "r_y2": 563.3333333333334, + "r_x3": 295.33333333333337, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.9622145799999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 149.0, + "t": 554.6666666666666, + "r": 213.66666666666666, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7541249394416809, + "cells": [ + { + "index": 28, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 213.66666666666666, + "r_y0": 554.6666666666666, + "r_x1": 190.0, + "r_y1": 554.6666666666666, + "r_x2": 190.0, + "r_y2": 563.3333333333334, + "r_x3": 213.66666666666666, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.96403, + "from_ocr": true + }, + { + "index": 29, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 186.0, + "r_y0": 554.6666666666666, + "r_x1": 149.0, + "r_y1": 554.6666666666666, + "r_x2": 149.0, + "r_y2": 563.0, + "r_x3": 186.0, + "r_y3": 563.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96691612, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 371.6666666666667, + "t": 508.3333333333333, + "r": 451.6666666666667, + "b": 517.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7616423964500427, + "cells": [ + { + "index": 30, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 451.6666666666667, + "r_y0": 508.3333333333333, + "r_x1": 422.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 422.6666666666667, + "r_y2": 517.0, + "r_x3": 451.6666666666667, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9684503200000001, + "from_ocr": true + }, + { + "index": 31, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 419.0, + "r_y0": 508.3333333333333, + "r_x1": 393.0, + "r_y1": 508.3333333333333, + "r_x2": 393.0, + "r_y2": 516.6666666666666, + "r_x3": 419.0, + "r_y3": 516.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96073517, + "from_ocr": true + }, + { + "index": 32, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 389.3333333333333, + "r_y0": 508.3333333333333, + "r_x1": 371.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 371.6666666666667, + "r_y2": 514.6666666666666, + "r_x3": 389.3333333333333, + "r_y3": 514.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9615368700000001, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 275.66666666666663, + "t": 508.3333333333333, + "r": 329.0, + "b": 517.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7468306422233582, + "cells": [ + { + "index": 33, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 329.0, + "r_y0": 508.3333333333333, + "r_x1": 303.0, + "r_y1": 508.3333333333333, + "r_x2": 303.0, + "r_y2": 517.0, + "r_x3": 329.0, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9626261100000001, + "from_ocr": true + }, + { + "index": 34, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 299.66666666666663, + "r_y0": 508.3333333333333, + "r_x1": 275.66666666666663, + "r_y1": 508.3333333333333, + "r_x2": 275.66666666666663, + "r_y2": 517.0, + "r_x3": 299.66666666666663, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.96669136, + "from_ocr": true + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "ecel", + "ched", + "ched", + "nl", + "rhed", + "fcel", + "fcel", + "nl", + "rhed", + "fcel", + "ecel", + "nl" + ], + "num_rows": 3, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 308.0, + "t": 601.0, + "r": 337.0, + "b": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some column", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 201.66666666666669, + "t": 601.0, + "r": 230.66666666666666, + "b": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Some other column", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 408.3333333333333, + "t": 554.6666666666666, + "r": 437.3333333333333, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Some row", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 299.0, + "t": 554.6666666666666, + "r": 326.33333333333337, + "b": 561.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "some cell", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 190.0, + "t": 554.6666666666666, + "r": 213.66666666666666, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "have content", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 422.6666666666667, + "t": 508.3333333333333, + "r": 451.6666666666667, + "b": 517.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Some other row", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 303.0, + "t": 508.3333333333333, + "r": 329.0, + "b": 517.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "other don't", + "column_header": true, + "row_header": false, + "row_section": false + } + ] + } + } }, "figures_classification": null, "equations_prediction": null, @@ -263,19 +3463,19 @@ "elements": [ { "label": "text", - "id": 0, + "id": 8, "page_no": 0, "cluster": { - "id": 0, + "id": 8, "label": "text", "bbox": { - "l": 89.23887497045128, - "t": 717.1685676116198, - "r": 523.208764293368, - "b": 764.898293373551, + "l": 194.03979, + "t": 690.10254, + "r": 410.73657, + "b": 719.149414, "coord_origin": "TOPLEFT" }, - "confidence": 0.7318570613861084, + "confidence": 0.7134009003639221, "cells": [ { "index": 0, @@ -286,22 +3486,43 @@ "a": 255 }, "rect": { - "r_x0": 89.2388782764286, - "r_y0": 764.898293373551, - "r_x1": 521.9863147998661, - "r_y1": 764.898293373551, - "r_x2": 521.9863147998661, - "r_y2": 744.0929853494625, - "r_x3": 89.2388782764286, - "r_y3": 744.0929853494625, + "r_x0": 194.03979, + "r_y0": 719.149414, + "r_x1": 410.73657, + "r_y1": 719.149414, + "r_x2": 410.73657, + "r_y2": 690.10254, + "r_x3": 194.03979, + "r_y3": 690.10254, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": " tset elbat a si sihT", + "orig": " tset elbat a si sihT", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - }, + "from_ocr": false + } + ], + "children": [] + }, + "text": "tset elbat a si sihT" + }, + { + "label": "text", + "id": 0, + "page_no": 0, + "cluster": { + "id": 0, + "label": "text", + "bbox": { + "l": 238.78076, + "t": 655.42273, + "r": 540.0, + "b": 667.71179, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.8374139070510864, + "cells": [ { "index": 1, "rgba": { @@ -311,43 +3532,22 @@ "a": 255 }, "rect": { - "r_x0": 89.23887497045128, - "r_y0": 739.1977118987292, - "r_x1": 523.208764293368, - "r_y1": 739.1977118987292, - "r_x2": 523.208764293368, - "r_y2": 717.1685676116198, - "r_x3": 89.23887497045128, - "r_y3": 717.1685676116198, + "r_x0": 521.0545, + "r_y0": 667.71179, + "r_x1": 540.0, + "r_y1": 667.71179, + "r_x2": 540.0, + "r_y2": 655.42273, + "r_x3": 521.0545, + "r_y3": 655.42273, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "ehT", + "orig": "ehT", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "label": "text", - "id": 2, - "page_no": 0, - "cluster": { - "id": 2, - "label": "text", - "bbox": { - "l": 441.2561096985719, - "t": 690.0429592741025, - "r": 522.0347860494834, - "b": 710.0268078458798, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.5982133150100708, - "cells": [ + "from_ocr": false + }, { "index": 2, "rgba": { @@ -357,44 +3557,1522 @@ "a": 255 }, "rect": { - "r_x0": 441.2561096985719, - "r_y0": 710.0268078458798, - "r_x1": 522.0347860494834, - "r_y1": 710.0268078458798, - "r_x2": 522.0347860494834, - "r_y2": 690.0429592741025, - "r_x3": 441.2561096985719, - "r_y3": 690.0429592741025, + "r_x0": 518.00269, + "r_y0": 667.71179, + "r_x1": 518.00488, + "r_y1": 667.71179, + "r_x2": 518.00488, + "r_y2": 655.42273, + "r_x3": 518.00269, + "r_y3": 655.42273, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": " t", + "orig": " t", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 503.33759000000003, + "r_y0": 667.71179, + "r_x1": 514.95093, + "r_y1": 667.71179, + "r_x2": 514.95093, + "r_y2": 655.42273, + "r_x3": 503.33759000000003, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "se", + "orig": "se", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 500.28534, + "r_y0": 667.71179, + "r_x1": 500.28751, + "r_y1": 667.71179, + "r_x2": 500.28751, + "r_y2": 655.42273, + "r_x3": 500.28534, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "t ", + "orig": "t ", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.36172000000005, + "r_y0": 667.71179, + "r_x1": 497.23352, + "r_y1": 667.71179, + "r_x2": 497.23352, + "r_y2": 655.42273, + "r_x3": 459.36172000000005, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "w strats", + "orig": "w strats", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 456.92352, + "r_y0": 667.71179, + "r_x1": 456.92526, + "r_y1": 667.71179, + "r_x2": 456.92526, + "r_y2": 655.42273, + "r_x3": 456.92352, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "it", + "orig": "it", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 377.49374, + "r_y0": 667.71179, + "r_x1": 453.87128, + "r_y1": 667.71179, + "r_x2": 453.87128, + "r_y2": 655.42273, + "r_x3": 377.49374, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "modnar emos h", + "orig": "modnar emos h", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 374.44409, + "r_y0": 667.71179, + "r_x1": 374.44629, + "r_y1": 667.71179, + "r_x2": 374.44629, + "r_y2": 655.42273, + "r_x3": 374.44409, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": " t", + "orig": " t", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 359.77896, + "r_y0": 667.71179, + "r_x1": 371.3923, + "r_y1": 667.71179, + "r_x2": 371.3923, + "r_y2": 655.42273, + "r_x3": 359.77896, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "xe", + "orig": "xe", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 356.72672, + "r_y0": 667.71179, + "r_x1": 356.72888, + "r_y1": 667.71179, + "r_x2": 356.72888, + "r_y2": 655.42273, + "r_x3": 356.72672, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "t ", + "orig": "t ", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 335.3306, + "r_y0": 667.71179, + "r_x1": 353.67493, + "r_y1": 667.71179, + "r_x2": 353.67493, + "r_y2": 655.42273, + "r_x3": 335.3306, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "dna", + "orig": "dna", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 332.27878, + "r_y0": 667.71179, + "r_x1": 332.28094, + "r_y1": 667.71179, + "r_x2": 332.28094, + "r_y2": 655.42273, + "r_x3": 332.27878, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": " t", + "orig": " t", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 301.7153, + "r_y0": 667.71179, + "r_x1": 329.22699, + "r_y1": 667.71179, + "r_x2": 329.22699, + "r_y2": 655.42273, + "r_x3": 301.7153, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "a neh", + "orig": "a neh", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 298.66348, + "r_y0": 667.71179, + "r_x1": 298.66565, + "r_y1": 667.71179, + "r_x2": 298.66565, + "r_y2": 655.42273, + "r_x3": 298.66348, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": " t", + "orig": " t", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 274.82526, + "r_y0": 667.71179, + "r_x1": 295.61169, + "r_y1": 667.71179, + "r_x2": 295.61169, + "r_y2": 655.42273, + "r_x3": 274.82526, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "elba", + "orig": "elba", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 271.77344, + "r_y0": 667.71179, + "r_x1": 271.7756, + "r_y1": 667.71179, + "r_x2": 271.7756, + "r_y2": 655.42273, + "r_x3": 271.77344, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": " i", + "orig": " i", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 241.83258, + "r_y0": 667.71179, + "r_x1": 269.3335, + "r_y1": 667.71179, + "r_x2": 269.3335, + "r_y2": 655.42273, + "r_x3": 241.83258, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "egam", + "orig": "egam", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 18, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 238.78076, + "r_y0": 667.71179, + "r_x1": 238.78296, + "r_y1": 667.71179, + "r_x2": 238.78296, + "r_y2": 655.42273, + "r_x3": 238.78076, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": ": ", + "orig": ": ", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], "children": [] }, - "text": "package" + "text": "ehT t se t w strats it modnar emos h t xe t dna t a neh t elba i egam :" + }, + { + "label": "table", + "id": 9, + "page_no": 0, + "cluster": { + "id": 9, + "label": "table", + "bbox": { + "l": 112.69406127929688, + "t": 489.72344970703125, + "r": 470.0718078613281, + "b": 628.2994995117188, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.6408323049545288, + "cells": [ + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 337.0, + "r_y0": 601.0, + "r_x1": 308.0, + "r_y1": 601.0, + "r_x2": 308.0, + "r_y2": 609.6666666666666, + "r_x3": 337.0, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95841644, + "from_ocr": true + }, + { + "index": 20, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 304.0, + "r_y0": 601.0, + "r_x1": 268.33333333333337, + "r_y1": 601.0, + "r_x2": 268.33333333333337, + "r_y2": 609.3333333333334, + "r_x3": 304.0, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95624527, + "from_ocr": true + }, + { + "index": 21, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 230.66666666666666, + "r_y0": 601.0, + "r_x1": 201.66666666666669, + "r_y1": 601.0, + "r_x2": 201.66666666666669, + "r_y2": 609.6666666666666, + "r_x3": 230.66666666666666, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96296555, + "from_ocr": true + }, + { + "index": 22, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 197.66666666666669, + "r_y0": 601.0, + "r_x1": 172.0, + "r_y1": 601.0, + "r_x2": 172.0, + "r_y2": 609.3333333333334, + "r_x3": 197.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96467484, + "from_ocr": true + }, + { + "index": 23, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 168.66666666666669, + "r_y0": 601.0, + "r_x1": 133.0, + "r_y1": 601.0, + "r_x2": 133.0, + "r_y2": 609.3333333333334, + "r_x3": 168.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95497986, + "from_ocr": true + }, + { + "index": 24, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 437.3333333333333, + "r_y0": 554.6666666666666, + "r_x1": 408.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 408.3333333333333, + "r_y2": 563.3333333333334, + "r_x3": 437.3333333333333, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95944489, + "from_ocr": true + }, + { + "index": 25, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 404.0, + "r_y0": 554.6666666666666, + "r_x1": 386.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 386.3333333333333, + "r_y2": 561.0, + "r_x3": 404.0, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9680950199999999, + "from_ocr": true + }, + { + "index": 26, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 299.0, + "r_y1": 554.6666666666666, + "r_x2": 299.0, + "r_y2": 561.0, + "r_x3": 326.33333333333337, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9569136, + "from_ocr": true + }, + { + "index": 27, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 295.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 279.0, + "r_y1": 554.6666666666666, + "r_x2": 279.0, + "r_y2": 563.3333333333334, + "r_x3": 295.33333333333337, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.9622145799999999, + "from_ocr": true + }, + { + "index": 28, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 213.66666666666666, + "r_y0": 554.6666666666666, + "r_x1": 190.0, + "r_y1": 554.6666666666666, + "r_x2": 190.0, + "r_y2": 563.3333333333334, + "r_x3": 213.66666666666666, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.96403, + "from_ocr": true + }, + { + "index": 29, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 186.0, + "r_y0": 554.6666666666666, + "r_x1": 149.0, + "r_y1": 554.6666666666666, + "r_x2": 149.0, + "r_y2": 563.0, + "r_x3": 186.0, + "r_y3": 563.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96691612, + "from_ocr": true + }, + { + "index": 30, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 451.6666666666667, + "r_y0": 508.3333333333333, + "r_x1": 422.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 422.6666666666667, + "r_y2": 517.0, + "r_x3": 451.6666666666667, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9684503200000001, + "from_ocr": true + }, + { + "index": 31, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 419.0, + "r_y0": 508.3333333333333, + "r_x1": 393.0, + "r_y1": 508.3333333333333, + "r_x2": 393.0, + "r_y2": 516.6666666666666, + "r_x3": 419.0, + "r_y3": 516.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96073517, + "from_ocr": true + }, + { + "index": 32, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 389.3333333333333, + "r_y0": 508.3333333333333, + "r_x1": 371.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 371.6666666666667, + "r_y2": 514.6666666666666, + "r_x3": 389.3333333333333, + "r_y3": 514.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9615368700000001, + "from_ocr": true + }, + { + "index": 33, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 329.0, + "r_y0": 508.3333333333333, + "r_x1": 303.0, + "r_y1": 508.3333333333333, + "r_x2": 303.0, + "r_y2": 517.0, + "r_x3": 329.0, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9626261100000001, + "from_ocr": true + }, + { + "index": 34, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 299.66666666666663, + "r_y0": 508.3333333333333, + "r_x1": 275.66666666666663, + "r_y1": 508.3333333333333, + "r_x2": 275.66666666666663, + "r_y2": 517.0, + "r_x3": 299.66666666666663, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.96669136, + "from_ocr": true + } + ], + "children": [ + { + "id": 7, + "label": "text", + "bbox": { + "l": 268.33333333333337, + "t": 601.0, + "r": 337.0, + "b": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7441245913505554, + "cells": [ + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 337.0, + "r_y0": 601.0, + "r_x1": 308.0, + "r_y1": 601.0, + "r_x2": 308.0, + "r_y2": 609.6666666666666, + "r_x3": 337.0, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95841644, + "from_ocr": true + }, + { + "index": 20, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 304.0, + "r_y0": 601.0, + "r_x1": 268.33333333333337, + "r_y1": 601.0, + "r_x2": 268.33333333333337, + "r_y2": 609.3333333333334, + "r_x3": 304.0, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95624527, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 1, + "label": "text", + "bbox": { + "l": 133.0, + "t": 601.0, + "r": 230.66666666666666, + "b": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7668525576591492, + "cells": [ + { + "index": 21, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 230.66666666666666, + "r_y0": 601.0, + "r_x1": 201.66666666666669, + "r_y1": 601.0, + "r_x2": 201.66666666666669, + "r_y2": 609.6666666666666, + "r_x3": 230.66666666666666, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96296555, + "from_ocr": true + }, + { + "index": 22, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 197.66666666666669, + "r_y0": 601.0, + "r_x1": 172.0, + "r_y1": 601.0, + "r_x2": 172.0, + "r_y2": 609.3333333333334, + "r_x3": 197.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96467484, + "from_ocr": true + }, + { + "index": 23, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 168.66666666666669, + "r_y0": 601.0, + "r_x1": 133.0, + "r_y1": 601.0, + "r_x2": 133.0, + "r_y2": 609.3333333333334, + "r_x3": 168.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95497986, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 386.3333333333333, + "t": 554.6666666666666, + "r": 437.3333333333333, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7588309049606323, + "cells": [ + { + "index": 24, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 437.3333333333333, + "r_y0": 554.6666666666666, + "r_x1": 408.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 408.3333333333333, + "r_y2": 563.3333333333334, + "r_x3": 437.3333333333333, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95944489, + "from_ocr": true + }, + { + "index": 25, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 404.0, + "r_y0": 554.6666666666666, + "r_x1": 386.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 386.3333333333333, + "r_y2": 561.0, + "r_x3": 404.0, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9680950199999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 279.0, + "t": 554.6666666666666, + "r": 326.33333333333337, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7627862095832825, + "cells": [ + { + "index": 26, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 299.0, + "r_y1": 554.6666666666666, + "r_x2": 299.0, + "r_y2": 561.0, + "r_x3": 326.33333333333337, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9569136, + "from_ocr": true + }, + { + "index": 27, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 295.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 279.0, + "r_y1": 554.6666666666666, + "r_x2": 279.0, + "r_y2": 563.3333333333334, + "r_x3": 295.33333333333337, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.9622145799999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 149.0, + "t": 554.6666666666666, + "r": 213.66666666666666, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7541249394416809, + "cells": [ + { + "index": 28, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 213.66666666666666, + "r_y0": 554.6666666666666, + "r_x1": 190.0, + "r_y1": 554.6666666666666, + "r_x2": 190.0, + "r_y2": 563.3333333333334, + "r_x3": 213.66666666666666, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.96403, + "from_ocr": true + }, + { + "index": 29, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 186.0, + "r_y0": 554.6666666666666, + "r_x1": 149.0, + "r_y1": 554.6666666666666, + "r_x2": 149.0, + "r_y2": 563.0, + "r_x3": 186.0, + "r_y3": 563.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96691612, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 371.6666666666667, + "t": 508.3333333333333, + "r": 451.6666666666667, + "b": 517.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7616423964500427, + "cells": [ + { + "index": 30, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 451.6666666666667, + "r_y0": 508.3333333333333, + "r_x1": 422.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 422.6666666666667, + "r_y2": 517.0, + "r_x3": 451.6666666666667, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9684503200000001, + "from_ocr": true + }, + { + "index": 31, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 419.0, + "r_y0": 508.3333333333333, + "r_x1": 393.0, + "r_y1": 508.3333333333333, + "r_x2": 393.0, + "r_y2": 516.6666666666666, + "r_x3": 419.0, + "r_y3": 516.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96073517, + "from_ocr": true + }, + { + "index": 32, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 389.3333333333333, + "r_y0": 508.3333333333333, + "r_x1": 371.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 371.6666666666667, + "r_y2": 514.6666666666666, + "r_x3": 389.3333333333333, + "r_y3": 514.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9615368700000001, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 275.66666666666663, + "t": 508.3333333333333, + "r": 329.0, + "b": 517.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7468306422233582, + "cells": [ + { + "index": 33, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 329.0, + "r_y0": 508.3333333333333, + "r_x1": 303.0, + "r_y1": 508.3333333333333, + "r_x2": 303.0, + "r_y2": 517.0, + "r_x3": 329.0, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9626261100000001, + "from_ocr": true + }, + { + "index": 34, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 299.66666666666663, + "r_y0": 508.3333333333333, + "r_x1": 275.66666666666663, + "r_y1": 508.3333333333333, + "r_x2": 275.66666666666663, + "r_y2": 517.0, + "r_x3": 299.66666666666663, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.96669136, + "from_ocr": true + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "ecel", + "ched", + "ched", + "nl", + "rhed", + "fcel", + "fcel", + "nl", + "rhed", + "fcel", + "ecel", + "nl" + ], + "num_rows": 3, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 308.0, + "t": 601.0, + "r": 337.0, + "b": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some column", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 201.66666666666669, + "t": 601.0, + "r": 230.66666666666666, + "b": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Some other column", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 408.3333333333333, + "t": 554.6666666666666, + "r": 437.3333333333333, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Some row", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 299.0, + "t": 554.6666666666666, + "r": 326.33333333333337, + "b": 561.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "some cell", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 190.0, + "t": 554.6666666666666, + "r": 213.66666666666666, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "have content", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 422.6666666666667, + "t": 508.3333333333333, + "r": 451.6666666666667, + "b": 517.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Some other row", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 303.0, + "t": 508.3333333333333, + "r": 329.0, + "b": 517.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "other don't", + "column_header": true, + "row_header": false, + "row_section": false + } + ] } ], "body": [ { "label": "text", - "id": 0, + "id": 8, "page_no": 0, "cluster": { - "id": 0, + "id": 8, "label": "text", "bbox": { - "l": 89.23887497045128, - "t": 717.1685676116198, - "r": 523.208764293368, - "b": 764.898293373551, + "l": 194.03979, + "t": 690.10254, + "r": 410.73657, + "b": 719.149414, "coord_origin": "TOPLEFT" }, - "confidence": 0.7318570613861084, + "confidence": 0.7134009003639221, "cells": [ { "index": 0, @@ -405,22 +5083,43 @@ "a": 255 }, "rect": { - "r_x0": 89.2388782764286, - "r_y0": 764.898293373551, - "r_x1": 521.9863147998661, - "r_y1": 764.898293373551, - "r_x2": 521.9863147998661, - "r_y2": 744.0929853494625, - "r_x3": 89.2388782764286, - "r_y3": 744.0929853494625, + "r_x0": 194.03979, + "r_y0": 719.149414, + "r_x1": 410.73657, + "r_y1": 719.149414, + "r_x2": 410.73657, + "r_y2": 690.10254, + "r_x3": 194.03979, + "r_y3": 690.10254, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": " tset elbat a si sihT", + "orig": " tset elbat a si sihT", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - }, + "from_ocr": false + } + ], + "children": [] + }, + "text": "tset elbat a si sihT" + }, + { + "label": "text", + "id": 0, + "page_no": 0, + "cluster": { + "id": 0, + "label": "text", + "bbox": { + "l": 238.78076, + "t": 655.42273, + "r": 540.0, + "b": 667.71179, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.8374139070510864, + "cells": [ { "index": 1, "rgba": { @@ -430,43 +5129,22 @@ "a": 255 }, "rect": { - "r_x0": 89.23887497045128, - "r_y0": 739.1977118987292, - "r_x1": 523.208764293368, - "r_y1": 739.1977118987292, - "r_x2": 523.208764293368, - "r_y2": 717.1685676116198, - "r_x3": 89.23887497045128, - "r_y3": 717.1685676116198, + "r_x0": 521.0545, + "r_y0": 667.71179, + "r_x1": 540.0, + "r_y1": 667.71179, + "r_x2": 540.0, + "r_y2": 655.42273, + "r_x3": 521.0545, + "r_y3": 655.42273, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "ehT", + "orig": "ehT", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "label": "text", - "id": 2, - "page_no": 0, - "cluster": { - "id": 2, - "label": "text", - "bbox": { - "l": 441.2561096985719, - "t": 690.0429592741025, - "r": 522.0347860494834, - "b": 710.0268078458798, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.5982133150100708, - "cells": [ + "from_ocr": false + }, { "index": 2, "rgba": { @@ -476,26 +5154,1504 @@ "a": 255 }, "rect": { - "r_x0": 441.2561096985719, - "r_y0": 710.0268078458798, - "r_x1": 522.0347860494834, - "r_y1": 710.0268078458798, - "r_x2": 522.0347860494834, - "r_y2": 690.0429592741025, - "r_x3": 441.2561096985719, - "r_y3": 690.0429592741025, + "r_x0": 518.00269, + "r_y0": 667.71179, + "r_x1": 518.00488, + "r_y1": 667.71179, + "r_x2": 518.00488, + "r_y2": 655.42273, + "r_x3": 518.00269, + "r_y3": 655.42273, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": " t", + "orig": " t", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 503.33759000000003, + "r_y0": 667.71179, + "r_x1": 514.95093, + "r_y1": 667.71179, + "r_x2": 514.95093, + "r_y2": 655.42273, + "r_x3": 503.33759000000003, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "se", + "orig": "se", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 500.28534, + "r_y0": 667.71179, + "r_x1": 500.28751, + "r_y1": 667.71179, + "r_x2": 500.28751, + "r_y2": 655.42273, + "r_x3": 500.28534, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "t ", + "orig": "t ", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.36172000000005, + "r_y0": 667.71179, + "r_x1": 497.23352, + "r_y1": 667.71179, + "r_x2": 497.23352, + "r_y2": 655.42273, + "r_x3": 459.36172000000005, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "w strats", + "orig": "w strats", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 456.92352, + "r_y0": 667.71179, + "r_x1": 456.92526, + "r_y1": 667.71179, + "r_x2": 456.92526, + "r_y2": 655.42273, + "r_x3": 456.92352, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "it", + "orig": "it", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 377.49374, + "r_y0": 667.71179, + "r_x1": 453.87128, + "r_y1": 667.71179, + "r_x2": 453.87128, + "r_y2": 655.42273, + "r_x3": 377.49374, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "modnar emos h", + "orig": "modnar emos h", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 374.44409, + "r_y0": 667.71179, + "r_x1": 374.44629, + "r_y1": 667.71179, + "r_x2": 374.44629, + "r_y2": 655.42273, + "r_x3": 374.44409, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": " t", + "orig": " t", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 359.77896, + "r_y0": 667.71179, + "r_x1": 371.3923, + "r_y1": 667.71179, + "r_x2": 371.3923, + "r_y2": 655.42273, + "r_x3": 359.77896, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "xe", + "orig": "xe", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 356.72672, + "r_y0": 667.71179, + "r_x1": 356.72888, + "r_y1": 667.71179, + "r_x2": 356.72888, + "r_y2": 655.42273, + "r_x3": 356.72672, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "t ", + "orig": "t ", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 335.3306, + "r_y0": 667.71179, + "r_x1": 353.67493, + "r_y1": 667.71179, + "r_x2": 353.67493, + "r_y2": 655.42273, + "r_x3": 335.3306, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "dna", + "orig": "dna", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 332.27878, + "r_y0": 667.71179, + "r_x1": 332.28094, + "r_y1": 667.71179, + "r_x2": 332.28094, + "r_y2": 655.42273, + "r_x3": 332.27878, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": " t", + "orig": " t", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 301.7153, + "r_y0": 667.71179, + "r_x1": 329.22699, + "r_y1": 667.71179, + "r_x2": 329.22699, + "r_y2": 655.42273, + "r_x3": 301.7153, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "a neh", + "orig": "a neh", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 298.66348, + "r_y0": 667.71179, + "r_x1": 298.66565, + "r_y1": 667.71179, + "r_x2": 298.66565, + "r_y2": 655.42273, + "r_x3": 298.66348, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": " t", + "orig": " t", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 274.82526, + "r_y0": 667.71179, + "r_x1": 295.61169, + "r_y1": 667.71179, + "r_x2": 295.61169, + "r_y2": 655.42273, + "r_x3": 274.82526, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "elba", + "orig": "elba", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 271.77344, + "r_y0": 667.71179, + "r_x1": 271.7756, + "r_y1": 667.71179, + "r_x2": 271.7756, + "r_y2": 655.42273, + "r_x3": 271.77344, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": " i", + "orig": " i", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 241.83258, + "r_y0": 667.71179, + "r_x1": 269.3335, + "r_y1": 667.71179, + "r_x2": 269.3335, + "r_y2": 655.42273, + "r_x3": 241.83258, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": "egam", + "orig": "egam", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 18, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 238.78076, + "r_y0": 667.71179, + "r_x1": 238.78296, + "r_y1": 667.71179, + "r_x2": 238.78296, + "r_y2": 655.42273, + "r_x3": 238.78076, + "r_y3": 655.42273, + "coord_origin": "TOPLEFT" + }, + "text": ": ", + "orig": ": ", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], "children": [] }, - "text": "package" + "text": "ehT t se t w strats it modnar emos h t xe t dna t a neh t elba i egam :" + }, + { + "label": "table", + "id": 9, + "page_no": 0, + "cluster": { + "id": 9, + "label": "table", + "bbox": { + "l": 112.69406127929688, + "t": 489.72344970703125, + "r": 470.0718078613281, + "b": 628.2994995117188, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.6408323049545288, + "cells": [ + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 337.0, + "r_y0": 601.0, + "r_x1": 308.0, + "r_y1": 601.0, + "r_x2": 308.0, + "r_y2": 609.6666666666666, + "r_x3": 337.0, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95841644, + "from_ocr": true + }, + { + "index": 20, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 304.0, + "r_y0": 601.0, + "r_x1": 268.33333333333337, + "r_y1": 601.0, + "r_x2": 268.33333333333337, + "r_y2": 609.3333333333334, + "r_x3": 304.0, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95624527, + "from_ocr": true + }, + { + "index": 21, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 230.66666666666666, + "r_y0": 601.0, + "r_x1": 201.66666666666669, + "r_y1": 601.0, + "r_x2": 201.66666666666669, + "r_y2": 609.6666666666666, + "r_x3": 230.66666666666666, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96296555, + "from_ocr": true + }, + { + "index": 22, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 197.66666666666669, + "r_y0": 601.0, + "r_x1": 172.0, + "r_y1": 601.0, + "r_x2": 172.0, + "r_y2": 609.3333333333334, + "r_x3": 197.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96467484, + "from_ocr": true + }, + { + "index": 23, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 168.66666666666669, + "r_y0": 601.0, + "r_x1": 133.0, + "r_y1": 601.0, + "r_x2": 133.0, + "r_y2": 609.3333333333334, + "r_x3": 168.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95497986, + "from_ocr": true + }, + { + "index": 24, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 437.3333333333333, + "r_y0": 554.6666666666666, + "r_x1": 408.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 408.3333333333333, + "r_y2": 563.3333333333334, + "r_x3": 437.3333333333333, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95944489, + "from_ocr": true + }, + { + "index": 25, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 404.0, + "r_y0": 554.6666666666666, + "r_x1": 386.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 386.3333333333333, + "r_y2": 561.0, + "r_x3": 404.0, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9680950199999999, + "from_ocr": true + }, + { + "index": 26, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 299.0, + "r_y1": 554.6666666666666, + "r_x2": 299.0, + "r_y2": 561.0, + "r_x3": 326.33333333333337, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9569136, + "from_ocr": true + }, + { + "index": 27, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 295.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 279.0, + "r_y1": 554.6666666666666, + "r_x2": 279.0, + "r_y2": 563.3333333333334, + "r_x3": 295.33333333333337, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.9622145799999999, + "from_ocr": true + }, + { + "index": 28, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 213.66666666666666, + "r_y0": 554.6666666666666, + "r_x1": 190.0, + "r_y1": 554.6666666666666, + "r_x2": 190.0, + "r_y2": 563.3333333333334, + "r_x3": 213.66666666666666, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.96403, + "from_ocr": true + }, + { + "index": 29, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 186.0, + "r_y0": 554.6666666666666, + "r_x1": 149.0, + "r_y1": 554.6666666666666, + "r_x2": 149.0, + "r_y2": 563.0, + "r_x3": 186.0, + "r_y3": 563.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96691612, + "from_ocr": true + }, + { + "index": 30, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 451.6666666666667, + "r_y0": 508.3333333333333, + "r_x1": 422.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 422.6666666666667, + "r_y2": 517.0, + "r_x3": 451.6666666666667, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9684503200000001, + "from_ocr": true + }, + { + "index": 31, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 419.0, + "r_y0": 508.3333333333333, + "r_x1": 393.0, + "r_y1": 508.3333333333333, + "r_x2": 393.0, + "r_y2": 516.6666666666666, + "r_x3": 419.0, + "r_y3": 516.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96073517, + "from_ocr": true + }, + { + "index": 32, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 389.3333333333333, + "r_y0": 508.3333333333333, + "r_x1": 371.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 371.6666666666667, + "r_y2": 514.6666666666666, + "r_x3": 389.3333333333333, + "r_y3": 514.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9615368700000001, + "from_ocr": true + }, + { + "index": 33, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 329.0, + "r_y0": 508.3333333333333, + "r_x1": 303.0, + "r_y1": 508.3333333333333, + "r_x2": 303.0, + "r_y2": 517.0, + "r_x3": 329.0, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9626261100000001, + "from_ocr": true + }, + { + "index": 34, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 299.66666666666663, + "r_y0": 508.3333333333333, + "r_x1": 275.66666666666663, + "r_y1": 508.3333333333333, + "r_x2": 275.66666666666663, + "r_y2": 517.0, + "r_x3": 299.66666666666663, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.96669136, + "from_ocr": true + } + ], + "children": [ + { + "id": 7, + "label": "text", + "bbox": { + "l": 268.33333333333337, + "t": 601.0, + "r": 337.0, + "b": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7441245913505554, + "cells": [ + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 337.0, + "r_y0": 601.0, + "r_x1": 308.0, + "r_y1": 601.0, + "r_x2": 308.0, + "r_y2": 609.6666666666666, + "r_x3": 337.0, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95841644, + "from_ocr": true + }, + { + "index": 20, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 304.0, + "r_y0": 601.0, + "r_x1": 268.33333333333337, + "r_y1": 601.0, + "r_x2": 268.33333333333337, + "r_y2": 609.3333333333334, + "r_x3": 304.0, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95624527, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 1, + "label": "text", + "bbox": { + "l": 133.0, + "t": 601.0, + "r": 230.66666666666666, + "b": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7668525576591492, + "cells": [ + { + "index": 21, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 230.66666666666666, + "r_y0": 601.0, + "r_x1": 201.66666666666669, + "r_y1": 601.0, + "r_x2": 201.66666666666669, + "r_y2": 609.6666666666666, + "r_x3": 230.66666666666666, + "r_y3": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96296555, + "from_ocr": true + }, + { + "index": 22, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 197.66666666666669, + "r_y0": 601.0, + "r_x1": 172.0, + "r_y1": 601.0, + "r_x2": 172.0, + "r_y2": 609.3333333333334, + "r_x3": 197.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96467484, + "from_ocr": true + }, + { + "index": 23, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 168.66666666666669, + "r_y0": 601.0, + "r_x1": 133.0, + "r_y1": 601.0, + "r_x2": 133.0, + "r_y2": 609.3333333333334, + "r_x3": 168.66666666666669, + "r_y3": 609.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95497986, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 386.3333333333333, + "t": 554.6666666666666, + "r": 437.3333333333333, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7588309049606323, + "cells": [ + { + "index": 24, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 437.3333333333333, + "r_y0": 554.6666666666666, + "r_x1": 408.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 408.3333333333333, + "r_y2": 563.3333333333334, + "r_x3": 437.3333333333333, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95944489, + "from_ocr": true + }, + { + "index": 25, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 404.0, + "r_y0": 554.6666666666666, + "r_x1": 386.3333333333333, + "r_y1": 554.6666666666666, + "r_x2": 386.3333333333333, + "r_y2": 561.0, + "r_x3": 404.0, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9680950199999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 279.0, + "t": 554.6666666666666, + "r": 326.33333333333337, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7627862095832825, + "cells": [ + { + "index": 26, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 299.0, + "r_y1": 554.6666666666666, + "r_x2": 299.0, + "r_y2": 561.0, + "r_x3": 326.33333333333337, + "r_y3": 561.0, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9569136, + "from_ocr": true + }, + { + "index": 27, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 295.33333333333337, + "r_y0": 554.6666666666666, + "r_x1": 279.0, + "r_y1": 554.6666666666666, + "r_x2": 279.0, + "r_y2": 563.3333333333334, + "r_x3": 295.33333333333337, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.9622145799999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 149.0, + "t": 554.6666666666666, + "r": 213.66666666666666, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7541249394416809, + "cells": [ + { + "index": 28, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 213.66666666666666, + "r_y0": 554.6666666666666, + "r_x1": 190.0, + "r_y1": 554.6666666666666, + "r_x2": 190.0, + "r_y2": 563.3333333333334, + "r_x3": 213.66666666666666, + "r_y3": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.96403, + "from_ocr": true + }, + { + "index": 29, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 186.0, + "r_y0": 554.6666666666666, + "r_x1": 149.0, + "r_y1": 554.6666666666666, + "r_x2": 149.0, + "r_y2": 563.0, + "r_x3": 186.0, + "r_y3": 563.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96691612, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 371.6666666666667, + "t": 508.3333333333333, + "r": 451.6666666666667, + "b": 517.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7616423964500427, + "cells": [ + { + "index": 30, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 451.6666666666667, + "r_y0": 508.3333333333333, + "r_x1": 422.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 422.6666666666667, + "r_y2": 517.0, + "r_x3": 451.6666666666667, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9684503200000001, + "from_ocr": true + }, + { + "index": 31, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 419.0, + "r_y0": 508.3333333333333, + "r_x1": 393.0, + "r_y1": 508.3333333333333, + "r_x2": 393.0, + "r_y2": 516.6666666666666, + "r_x3": 419.0, + "r_y3": 516.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96073517, + "from_ocr": true + }, + { + "index": 32, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 389.3333333333333, + "r_y0": 508.3333333333333, + "r_x1": 371.6666666666667, + "r_y1": 508.3333333333333, + "r_x2": 371.6666666666667, + "r_y2": 514.6666666666666, + "r_x3": 389.3333333333333, + "r_y3": 514.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9615368700000001, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 275.66666666666663, + "t": 508.3333333333333, + "r": 329.0, + "b": 517.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7468306422233582, + "cells": [ + { + "index": 33, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 329.0, + "r_y0": 508.3333333333333, + "r_x1": 303.0, + "r_y1": 508.3333333333333, + "r_x2": 303.0, + "r_y2": 517.0, + "r_x3": 329.0, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9626261100000001, + "from_ocr": true + }, + { + "index": 34, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 299.66666666666663, + "r_y0": 508.3333333333333, + "r_x1": 275.66666666666663, + "r_y1": 508.3333333333333, + "r_x2": 275.66666666666663, + "r_y2": 517.0, + "r_x3": 299.66666666666663, + "r_y3": 517.0, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.96669136, + "from_ocr": true + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "ecel", + "ched", + "ched", + "nl", + "rhed", + "fcel", + "fcel", + "nl", + "rhed", + "fcel", + "ecel", + "nl" + ], + "num_rows": 3, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 308.0, + "t": 601.0, + "r": 337.0, + "b": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some column", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 201.66666666666669, + "t": 601.0, + "r": 230.66666666666666, + "b": 609.6666666666666, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Some other column", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 408.3333333333333, + "t": 554.6666666666666, + "r": 437.3333333333333, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Some row", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 299.0, + "t": 554.6666666666666, + "r": 326.33333333333337, + "b": 561.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "some cell", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 190.0, + "t": 554.6666666666666, + "r": 213.66666666666666, + "b": 563.3333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "have content", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 422.6666666666667, + "t": 508.3333333333333, + "r": 451.6666666666667, + "b": 517.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Some other row", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 303.0, + "t": 508.3333333333333, + "r": 329.0, + "b": 517.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "other don't", + "column_header": true, + "row_header": false, + "row_section": false + } + ] } ], "headers": [] diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.json index fed4d9ec..0594cfe0 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.json +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.json @@ -27,53 +27,136 @@ "file-info": { "filename": "ocr_test_rotated_270.pdf", "filename-prov": null, - "document-hash": "52f54e7183bdb73aa3713c7b169baca93e276963a138418c26e7d6a1ea128f14", + "document-hash": "6fefac7b5b41551979e0acb695ca99549a91784619c82c6095d8130179431437", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [ { - "hash": "59bc9ddba89e7b008185dd16d384493beb034686e5670546786390c5d237a304", + "hash": "68730d83582a6ac50587fdba1a8ce6b263d682a0daf984522d4dbe9f9e3d4fbe", "model": "default", "page": 1 } ] }, "main-text": [ + { + "name": "Table", + "type": "table", + "$ref": "#/tables/0" + }, { "prov": [ { "bbox": [ - 690.2441821046808, - 442.39487414368364, - 709.8255852011977, - 523.076601235155 + 640.87671, + 235.72681, + 653.16504, + 533.28552 ], "page": 1, "span": [ 0, - 7 + 49 ], "__ref_s3_data": null } ], - "text": "package", + "text": "heteststartswithsomerandomtextandthenatableimage:", "type": "paragraph", "payload": null, "name": "Text", "font": null + }, + { + "name": "Picture", + "type": "figure", + "$ref": "#/figures/0" + }, + { + "prov": [ + { + "bbox": [ + 690.10272, + 194.03976, + 719.1490499999999, + 410.73663 + ], + "page": 1, + "span": [ + 0, + 20 + ], + "__ref_s3_data": null + } + ], + "text": "This is a table test", + "type": "subtitle-level-1", + "payload": null, + "name": "Section-header", + "font": null + } + ], + "figures": [ + { + "prov": [ + { + "bbox": [ + 668.9778442382812, + 532.5339431762695, + 683.4164962768555, + 541.4290084838867 + ], + "page": 1, + "span": [ + 0, + 0 + ], + "__ref_s3_data": null + } + ], + "text": "", + "type": "figure", + "payload": null, + "bounding-box": null + } + ], + "tables": [ + { + "prov": [ + { + "bbox": [ + 460.5577697753906, + 112.21743774414062, + 599.0364074707031, + 469.385986328125 + ], + "page": 1, + "span": [ + 0, + 0 + ], + "__ref_s3_data": null + } + ], + "text": "", + "type": "table", + "payload": null, + "#-cols": 0, + "#-rows": 0, + "data": [], + "model": null, + "bounding-box": null } ], - "figures": [], - "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [ { - "height": 595.201171875, + "height": 612.0, "page": 1, - "width": 841.9216918945312 + "width": 792.0 } ], "page-footers": [], diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.pages.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.pages.json index 4caa899d..87a75a66 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.pages.json +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.pages.json @@ -2,8 +2,8 @@ { "page_no": 0, "size": { - "width": 841.9216918945312, - "height": 595.201171875 + "width": 792.0, + "height": 612.0 }, "parsed_page": { "dimension": { @@ -11,47 +11,47 @@ "rect": { "r_x0": 0.0, "r_y0": 0.0, - "r_x1": 595.201171875, + "r_x1": 612.0, "r_y1": 0.0, - "r_x2": 595.201171875, - "r_y2": 841.9216918945312, + "r_x2": 612.0, + "r_y2": 792.0, "r_x3": 0.0, - "r_y3": 841.9216918945312, + "r_y3": 792.0, "coord_origin": "BOTTOMLEFT" }, "boundary_type": "crop_box", "art_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "bleed_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "crop_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "media_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "trim_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" } @@ -69,21 +69,21 @@ "a": 255 }, "rect": { - "r_x0": 744.0930045534915, - "r_y0": 504.87200373583954, - "r_x1": 764.8982839673505, - "r_y1": 504.87200373583954, - "r_x2": 764.8982839673505, - "r_y2": 73.34702001188118, - "r_x3": 744.0930045534915, - "r_y3": 73.34702001188118, + "r_x0": 690.10272, + "r_y0": 417.96024, + "r_x1": 719.1490499999999, + "r_y1": 417.96024, + "r_x2": 719.1490499999999, + "r_y2": 201.26337, + "r_x3": 690.10272, + "r_y3": 201.26337, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "This is a table test ", + "orig": "This is a table test ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -94,21 +94,21 @@ "a": 255 }, "rect": { - "r_x0": 717.168585936602, - "r_y0": 504.8720061466397, - "r_x1": 737.9738558137178, - "r_y1": 504.8720061466397, - "r_x2": 737.9738558137178, - "r_y2": 70.90211682372312, - "r_x3": 717.168585936602, - "r_y3": 70.90211682372312, + "r_x0": 669.96899, + "r_y0": 78.71936000000005, + "r_x1": 682.25806, + "r_y1": 78.71936000000005, + "r_x2": 682.25806, + "r_y2": 71.99987999999996, + "r_x3": 669.96899, + "r_y3": 71.99987999999996, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": " T", + "orig": " T", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 2, @@ -119,20 +119,445 @@ "a": 255 }, "rect": { - "r_x0": 690.2441821046808, - "r_y0": 152.80629773131633, - "r_x1": 709.8255852011977, - "r_y1": 152.80629773131633, - "r_x2": 709.8255852011977, - "r_y2": 72.124570639845, - "r_x3": 690.2441821046808, - "r_y3": 72.124570639845, + "r_x0": 640.87671, + "r_y0": 376.27319, + "r_x1": 653.16504, + "r_y1": 376.27319, + "r_x2": 653.16504, + "r_y2": 78.71447999999998, + "r_x3": 640.87671, + "r_y3": 78.71447999999998, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "heteststartswithsomerandomtextandthenatableimage: ", + "orig": "heteststartswithsomerandomtextandthenatableimage: ", "text_direction": "left_to_right", "confidence": 1.0, + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.21301, + "r_y0": 309.05624, + "r_x1": 420.50208, + "r_y1": 309.05624, + "r_x2": 420.50208, + "r_y2": 306.0, + "r_x3": 408.21301, + "r_y3": 306.0, + "coord_origin": "TOPLEFT" + }, + "text": " ", + "orig": " ", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 275.0, + "r_x1": 572.0, + "r_y1": 304.0, + "r_x2": 580.6666666666666, + "r_y2": 304.0, + "r_x3": 580.6666666666666, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95741158, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 308.0, + "r_x1": 572.0, + "r_y1": 343.66666666666663, + "r_x2": 580.3333333333334, + "r_y2": 343.66666666666663, + "r_x3": 580.3333333333334, + "r_y3": 308.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9541709899999999, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 381.3333333333333, + "r_x1": 572.0, + "r_y1": 410.3333333333333, + "r_x2": 580.6666666666666, + "r_y2": 410.3333333333333, + "r_x3": 580.6666666666666, + "r_y3": 381.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96515053, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 414.3333333333333, + "r_x1": 572.0, + "r_y1": 440.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 440.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 414.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9623101, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 443.3333333333333, + "r_x1": 572.0, + "r_y1": 479.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 479.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 443.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.94704376, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 175.0, + "r_x1": 525.6666666666666, + "r_y1": 204.0, + "r_x2": 534.3333333333334, + "r_y2": 204.0, + "r_x3": 534.3333333333334, + "r_y3": 175.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96139633, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 208.0, + "r_x1": 525.6666666666666, + "r_y1": 225.66666666666669, + "r_x2": 532.0, + "r_y2": 225.66666666666669, + "r_x3": 532.0, + "r_y3": 208.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9561322, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 285.66666666666663, + "r_x1": 525.6666666666666, + "r_y1": 313.0, + "r_x2": 532.0, + "r_y2": 313.0, + "r_x3": 532.0, + "r_y3": 285.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9615657, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 317.0, + "r_x1": 525.6666666666666, + "r_y1": 333.0, + "r_x2": 534.3333333333334, + "r_y2": 333.0, + "r_x3": 534.3333333333334, + "r_y3": 317.0, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.95838455, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 398.3333333333333, + "r_x1": 525.6666666666666, + "r_y1": 422.0, + "r_x2": 534.3333333333334, + "r_y2": 422.0, + "r_x3": 534.3333333333334, + "r_y3": 398.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.9642998500000001, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 426.0, + "r_x1": 525.6666666666666, + "r_y1": 463.0, + "r_x2": 534.0, + "r_y2": 463.0, + "r_x3": 534.0, + "r_y3": 426.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96576363, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 160.33333333333334, + "r_x1": 479.3333333333333, + "r_y1": 189.33333333333331, + "r_x2": 488.0, + "r_y2": 189.33333333333331, + "r_x3": 488.0, + "r_y3": 160.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96371613, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 193.33333333333331, + "r_x1": 479.3333333333333, + "r_y1": 219.0, + "r_x2": 488.0, + "r_y2": 219.0, + "r_x3": 488.0, + "r_y3": 193.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9634315499999999, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 222.66666666666669, + "r_x1": 479.3333333333333, + "r_y1": 240.33333333333331, + "r_x2": 485.6666666666667, + "r_y2": 240.33333333333331, + "r_x3": 485.6666666666667, + "r_y3": 222.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9611644699999999, + "from_ocr": true + }, + { + "index": 18, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 283.0, + "r_x1": 479.3333333333333, + "r_y1": 309.0, + "r_x2": 487.6666666666667, + "r_y2": 309.0, + "r_x3": 487.6666666666667, + "r_y3": 283.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.95751617, + "from_ocr": true + }, + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 312.33333333333337, + "r_x1": 479.3333333333333, + "r_y1": 336.33333333333337, + "r_x2": 488.0, + "r_y2": 336.33333333333337, + "r_x3": 488.0, + "r_y3": 312.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.9581434600000001, "from_ocr": true } ], @@ -146,16 +571,16 @@ "layout": { "clusters": [ { - "id": 0, - "label": "page_header", + "id": 8, + "label": "section_header", "bbox": { - "l": 717.168585936602, - "t": 70.90211682372312, - "r": 764.8982839673505, - "b": 504.8720061466397, + "l": 690.10272, + "t": 201.26337, + "r": 719.1490499999999, + "b": 417.96024, "coord_origin": "TOPLEFT" }, - "confidence": 0.6915205121040344, + "confidence": 0.7426818609237671, "cells": [ { "index": 0, @@ -166,22 +591,37 @@ "a": 255 }, "rect": { - "r_x0": 744.0930045534915, - "r_y0": 504.87200373583954, - "r_x1": 764.8982839673505, - "r_y1": 504.87200373583954, - "r_x2": 764.8982839673505, - "r_y2": 73.34702001188118, - "r_x3": 744.0930045534915, - "r_y3": 73.34702001188118, + "r_x0": 690.10272, + "r_y0": 417.96024, + "r_x1": 719.1490499999999, + "r_y1": 417.96024, + "r_x2": 719.1490499999999, + "r_y2": 201.26337, + "r_x3": 690.10272, + "r_y3": 201.26337, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "This is a table test ", + "orig": "This is a table test ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - }, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 13, + "label": "picture", + "bbox": { + "l": 668.9778442382812, + "t": 70.57099151611328, + "r": 683.4164962768555, + "b": 79.46605682373047, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.5229994654655457, + "cells": [ { "index": 1, "rgba": { @@ -191,36 +631,77 @@ "a": 255 }, "rect": { - "r_x0": 717.168585936602, - "r_y0": 504.8720061466397, - "r_x1": 737.9738558137178, - "r_y1": 504.8720061466397, - "r_x2": 737.9738558137178, - "r_y2": 70.90211682372312, - "r_x3": 717.168585936602, - "r_y3": 70.90211682372312, + "r_x0": 669.96899, + "r_y0": 78.71936000000005, + "r_x1": 682.25806, + "r_y1": 78.71936000000005, + "r_x2": 682.25806, + "r_y2": 71.99987999999996, + "r_x3": 669.96899, + "r_y3": 71.99987999999996, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": " T", + "orig": " T", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false } ], - "children": [] + "children": [ + { + "id": 20, + "label": "text", + "bbox": { + "l": 669.96899, + "t": 71.99987999999996, + "r": 682.25806, + "b": 78.71936000000005, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 669.96899, + "r_y0": 78.71936000000005, + "r_x1": 682.25806, + "r_y1": 78.71936000000005, + "r_x2": 682.25806, + "r_y2": 71.99987999999996, + "r_x3": 669.96899, + "r_y3": 71.99987999999996, + "coord_origin": "TOPLEFT" + }, + "text": " T", + "orig": " T", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] }, { - "id": 8, + "id": 0, "label": "text", "bbox": { - "l": 690.2441821046808, - "t": 72.124570639845, - "r": 709.8255852011977, - "b": 152.80629773131633, + "l": 640.87671, + "t": 78.71447999999998, + "r": 653.16504, + "b": 376.27319, "coord_origin": "TOPLEFT" }, - "confidence": 1.0, + "confidence": 0.8302523493766785, "cells": [ { "index": 2, @@ -231,29 +712,1895 @@ "a": 255 }, "rect": { - "r_x0": 690.2441821046808, - "r_y0": 152.80629773131633, - "r_x1": 709.8255852011977, - "r_y1": 152.80629773131633, - "r_x2": 709.8255852011977, - "r_y2": 72.124570639845, - "r_x3": 690.2441821046808, - "r_y3": 72.124570639845, + "r_x0": 640.87671, + "r_y0": 376.27319, + "r_x1": 653.16504, + "r_y1": 376.27319, + "r_x2": 653.16504, + "r_y2": 78.71447999999998, + "r_x3": 640.87671, + "r_y3": 78.71447999999998, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "heteststartswithsomerandomtextandthenatableimage: ", + "orig": "heteststartswithsomerandomtextandthenatableimage: ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false } ], "children": [] + }, + { + "id": 11, + "label": "table", + "bbox": { + "l": 460.5577697753906, + "t": 142.614013671875, + "r": 599.0364074707031, + "b": 499.7825622558594, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.5623787045478821, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 275.0, + "r_x1": 572.0, + "r_y1": 304.0, + "r_x2": 580.6666666666666, + "r_y2": 304.0, + "r_x3": 580.6666666666666, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95741158, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 308.0, + "r_x1": 572.0, + "r_y1": 343.66666666666663, + "r_x2": 580.3333333333334, + "r_y2": 343.66666666666663, + "r_x3": 580.3333333333334, + "r_y3": 308.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9541709899999999, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 381.3333333333333, + "r_x1": 572.0, + "r_y1": 410.3333333333333, + "r_x2": 580.6666666666666, + "r_y2": 410.3333333333333, + "r_x3": 580.6666666666666, + "r_y3": 381.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96515053, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 414.3333333333333, + "r_x1": 572.0, + "r_y1": 440.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 440.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 414.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9623101, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 443.3333333333333, + "r_x1": 572.0, + "r_y1": 479.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 479.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 443.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.94704376, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 175.0, + "r_x1": 525.6666666666666, + "r_y1": 204.0, + "r_x2": 534.3333333333334, + "r_y2": 204.0, + "r_x3": 534.3333333333334, + "r_y3": 175.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96139633, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 208.0, + "r_x1": 525.6666666666666, + "r_y1": 225.66666666666669, + "r_x2": 532.0, + "r_y2": 225.66666666666669, + "r_x3": 532.0, + "r_y3": 208.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9561322, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 285.66666666666663, + "r_x1": 525.6666666666666, + "r_y1": 313.0, + "r_x2": 532.0, + "r_y2": 313.0, + "r_x3": 532.0, + "r_y3": 285.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9615657, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 317.0, + "r_x1": 525.6666666666666, + "r_y1": 333.0, + "r_x2": 534.3333333333334, + "r_y2": 333.0, + "r_x3": 534.3333333333334, + "r_y3": 317.0, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.95838455, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 398.3333333333333, + "r_x1": 525.6666666666666, + "r_y1": 422.0, + "r_x2": 534.3333333333334, + "r_y2": 422.0, + "r_x3": 534.3333333333334, + "r_y3": 398.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.9642998500000001, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 426.0, + "r_x1": 525.6666666666666, + "r_y1": 463.0, + "r_x2": 534.0, + "r_y2": 463.0, + "r_x3": 534.0, + "r_y3": 426.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96576363, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 160.33333333333334, + "r_x1": 479.3333333333333, + "r_y1": 189.33333333333331, + "r_x2": 488.0, + "r_y2": 189.33333333333331, + "r_x3": 488.0, + "r_y3": 160.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96371613, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 193.33333333333331, + "r_x1": 479.3333333333333, + "r_y1": 219.0, + "r_x2": 488.0, + "r_y2": 219.0, + "r_x3": 488.0, + "r_y3": 193.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9634315499999999, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 222.66666666666669, + "r_x1": 479.3333333333333, + "r_y1": 240.33333333333331, + "r_x2": 485.6666666666667, + "r_y2": 240.33333333333331, + "r_x3": 485.6666666666667, + "r_y3": 222.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9611644699999999, + "from_ocr": true + }, + { + "index": 18, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 283.0, + "r_x1": 479.3333333333333, + "r_y1": 309.0, + "r_x2": 487.6666666666667, + "r_y2": 309.0, + "r_x3": 487.6666666666667, + "r_y3": 283.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.95751617, + "from_ocr": true + }, + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 312.33333333333337, + "r_x1": 479.3333333333333, + "r_y1": 336.33333333333337, + "r_x2": 488.0, + "r_y2": 336.33333333333337, + "r_x3": 488.0, + "r_y3": 312.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.9581434600000001, + "from_ocr": true + } + ], + "children": [ + { + "id": 3, + "label": "text", + "bbox": { + "l": 572.0, + "t": 275.0, + "r": 580.6666666666666, + "b": 343.66666666666663, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7740143537521362, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 275.0, + "r_x1": 572.0, + "r_y1": 304.0, + "r_x2": 580.6666666666666, + "r_y2": 304.0, + "r_x3": 580.6666666666666, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95741158, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 308.0, + "r_x1": 572.0, + "r_y1": 343.66666666666663, + "r_x2": 580.3333333333334, + "r_y2": 343.66666666666663, + "r_x3": 580.3333333333334, + "r_y3": 308.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9541709899999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 1, + "label": "text", + "bbox": { + "l": 572.0, + "t": 381.3333333333333, + "r": 580.6666666666666, + "b": 479.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7769111394882202, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 381.3333333333333, + "r_x1": 572.0, + "r_y1": 410.3333333333333, + "r_x2": 580.6666666666666, + "r_y2": 410.3333333333333, + "r_x3": 580.6666666666666, + "r_y3": 381.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96515053, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 414.3333333333333, + "r_x1": 572.0, + "r_y1": 440.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 440.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 414.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9623101, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 443.3333333333333, + "r_x1": 572.0, + "r_y1": 479.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 479.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 443.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.94704376, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 525.6666666666666, + "t": 175.0, + "r": 534.3333333333334, + "b": 225.66666666666669, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7583935856819153, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 175.0, + "r_x1": 525.6666666666666, + "r_y1": 204.0, + "r_x2": 534.3333333333334, + "r_y2": 204.0, + "r_x3": 534.3333333333334, + "r_y3": 175.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96139633, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 208.0, + "r_x1": 525.6666666666666, + "r_y1": 225.66666666666669, + "r_x2": 532.0, + "r_y2": 225.66666666666669, + "r_x3": 532.0, + "r_y3": 208.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9561322, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 525.6666666666666, + "t": 285.66666666666663, + "r": 534.3333333333334, + "b": 333.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7750864028930664, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 285.66666666666663, + "r_x1": 525.6666666666666, + "r_y1": 313.0, + "r_x2": 532.0, + "r_y2": 313.0, + "r_x3": 532.0, + "r_y3": 285.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9615657, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 317.0, + "r_x1": 525.6666666666666, + "r_y1": 333.0, + "r_x2": 534.3333333333334, + "r_y2": 333.0, + "r_x3": 534.3333333333334, + "r_y3": 317.0, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.95838455, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 525.6666666666666, + "t": 398.3333333333333, + "r": 534.3333333333334, + "b": 463.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7514549493789673, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 398.3333333333333, + "r_x1": 525.6666666666666, + "r_y1": 422.0, + "r_x2": 534.3333333333334, + "r_y2": 422.0, + "r_x3": 534.3333333333334, + "r_y3": 398.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.9642998500000001, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 426.0, + "r_x1": 525.6666666666666, + "r_y1": 463.0, + "r_x2": 534.0, + "r_y2": 463.0, + "r_x3": 534.0, + "r_y3": 426.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96576363, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 479.3333333333333, + "t": 160.33333333333334, + "r": 488.0, + "b": 240.33333333333331, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.769959032535553, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 160.33333333333334, + "r_x1": 479.3333333333333, + "r_y1": 189.33333333333331, + "r_x2": 488.0, + "r_y2": 189.33333333333331, + "r_x3": 488.0, + "r_y3": 160.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96371613, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 193.33333333333331, + "r_x1": 479.3333333333333, + "r_y1": 219.0, + "r_x2": 488.0, + "r_y2": 219.0, + "r_x3": 488.0, + "r_y3": 193.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9634315499999999, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 222.66666666666669, + "r_x1": 479.3333333333333, + "r_y1": 240.33333333333331, + "r_x2": 485.6666666666667, + "r_y2": 240.33333333333331, + "r_x3": 485.6666666666667, + "r_y3": 222.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9611644699999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 479.3333333333333, + "t": 283.0, + "r": 488.0, + "b": 336.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7653545141220093, + "cells": [ + { + "index": 18, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 283.0, + "r_x1": 479.3333333333333, + "r_y1": 309.0, + "r_x2": 487.6666666666667, + "r_y2": 309.0, + "r_x3": 487.6666666666667, + "r_y3": 283.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.95751617, + "from_ocr": true + }, + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 312.33333333333337, + "r_x1": 479.3333333333333, + "r_y1": 336.33333333333337, + "r_x2": 488.0, + "r_y2": 336.33333333333337, + "r_x3": 488.0, + "r_y3": 312.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.9581434600000001, + "from_ocr": true + } + ], + "children": [] + } + ] } ] }, "tablestructure": { - "table_map": {} + "table_map": { + "11": { + "label": "table", + "id": 11, + "page_no": 0, + "cluster": { + "id": 11, + "label": "table", + "bbox": { + "l": 460.5577697753906, + "t": 142.614013671875, + "r": 599.0364074707031, + "b": 499.7825622558594, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.5623787045478821, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 275.0, + "r_x1": 572.0, + "r_y1": 304.0, + "r_x2": 580.6666666666666, + "r_y2": 304.0, + "r_x3": 580.6666666666666, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95741158, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 308.0, + "r_x1": 572.0, + "r_y1": 343.66666666666663, + "r_x2": 580.3333333333334, + "r_y2": 343.66666666666663, + "r_x3": 580.3333333333334, + "r_y3": 308.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9541709899999999, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 381.3333333333333, + "r_x1": 572.0, + "r_y1": 410.3333333333333, + "r_x2": 580.6666666666666, + "r_y2": 410.3333333333333, + "r_x3": 580.6666666666666, + "r_y3": 381.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96515053, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 414.3333333333333, + "r_x1": 572.0, + "r_y1": 440.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 440.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 414.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9623101, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 443.3333333333333, + "r_x1": 572.0, + "r_y1": 479.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 479.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 443.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.94704376, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 175.0, + "r_x1": 525.6666666666666, + "r_y1": 204.0, + "r_x2": 534.3333333333334, + "r_y2": 204.0, + "r_x3": 534.3333333333334, + "r_y3": 175.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96139633, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 208.0, + "r_x1": 525.6666666666666, + "r_y1": 225.66666666666669, + "r_x2": 532.0, + "r_y2": 225.66666666666669, + "r_x3": 532.0, + "r_y3": 208.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9561322, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 285.66666666666663, + "r_x1": 525.6666666666666, + "r_y1": 313.0, + "r_x2": 532.0, + "r_y2": 313.0, + "r_x3": 532.0, + "r_y3": 285.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9615657, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 317.0, + "r_x1": 525.6666666666666, + "r_y1": 333.0, + "r_x2": 534.3333333333334, + "r_y2": 333.0, + "r_x3": 534.3333333333334, + "r_y3": 317.0, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.95838455, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 398.3333333333333, + "r_x1": 525.6666666666666, + "r_y1": 422.0, + "r_x2": 534.3333333333334, + "r_y2": 422.0, + "r_x3": 534.3333333333334, + "r_y3": 398.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.9642998500000001, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 426.0, + "r_x1": 525.6666666666666, + "r_y1": 463.0, + "r_x2": 534.0, + "r_y2": 463.0, + "r_x3": 534.0, + "r_y3": 426.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96576363, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 160.33333333333334, + "r_x1": 479.3333333333333, + "r_y1": 189.33333333333331, + "r_x2": 488.0, + "r_y2": 189.33333333333331, + "r_x3": 488.0, + "r_y3": 160.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96371613, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 193.33333333333331, + "r_x1": 479.3333333333333, + "r_y1": 219.0, + "r_x2": 488.0, + "r_y2": 219.0, + "r_x3": 488.0, + "r_y3": 193.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9634315499999999, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 222.66666666666669, + "r_x1": 479.3333333333333, + "r_y1": 240.33333333333331, + "r_x2": 485.6666666666667, + "r_y2": 240.33333333333331, + "r_x3": 485.6666666666667, + "r_y3": 222.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9611644699999999, + "from_ocr": true + }, + { + "index": 18, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 283.0, + "r_x1": 479.3333333333333, + "r_y1": 309.0, + "r_x2": 487.6666666666667, + "r_y2": 309.0, + "r_x3": 487.6666666666667, + "r_y3": 283.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.95751617, + "from_ocr": true + }, + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 312.33333333333337, + "r_x1": 479.3333333333333, + "r_y1": 336.33333333333337, + "r_x2": 488.0, + "r_y2": 336.33333333333337, + "r_x3": 488.0, + "r_y3": 312.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.9581434600000001, + "from_ocr": true + } + ], + "children": [ + { + "id": 3, + "label": "text", + "bbox": { + "l": 572.0, + "t": 275.0, + "r": 580.6666666666666, + "b": 343.66666666666663, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7740143537521362, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 275.0, + "r_x1": 572.0, + "r_y1": 304.0, + "r_x2": 580.6666666666666, + "r_y2": 304.0, + "r_x3": 580.6666666666666, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95741158, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 308.0, + "r_x1": 572.0, + "r_y1": 343.66666666666663, + "r_x2": 580.3333333333334, + "r_y2": 343.66666666666663, + "r_x3": 580.3333333333334, + "r_y3": 308.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9541709899999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 1, + "label": "text", + "bbox": { + "l": 572.0, + "t": 381.3333333333333, + "r": 580.6666666666666, + "b": 479.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7769111394882202, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 381.3333333333333, + "r_x1": 572.0, + "r_y1": 410.3333333333333, + "r_x2": 580.6666666666666, + "r_y2": 410.3333333333333, + "r_x3": 580.6666666666666, + "r_y3": 381.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96515053, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 414.3333333333333, + "r_x1": 572.0, + "r_y1": 440.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 440.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 414.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9623101, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 443.3333333333333, + "r_x1": 572.0, + "r_y1": 479.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 479.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 443.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.94704376, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 525.6666666666666, + "t": 175.0, + "r": 534.3333333333334, + "b": 225.66666666666669, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7583935856819153, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 175.0, + "r_x1": 525.6666666666666, + "r_y1": 204.0, + "r_x2": 534.3333333333334, + "r_y2": 204.0, + "r_x3": 534.3333333333334, + "r_y3": 175.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96139633, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 208.0, + "r_x1": 525.6666666666666, + "r_y1": 225.66666666666669, + "r_x2": 532.0, + "r_y2": 225.66666666666669, + "r_x3": 532.0, + "r_y3": 208.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9561322, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 525.6666666666666, + "t": 285.66666666666663, + "r": 534.3333333333334, + "b": 333.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7750864028930664, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 285.66666666666663, + "r_x1": 525.6666666666666, + "r_y1": 313.0, + "r_x2": 532.0, + "r_y2": 313.0, + "r_x3": 532.0, + "r_y3": 285.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9615657, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 317.0, + "r_x1": 525.6666666666666, + "r_y1": 333.0, + "r_x2": 534.3333333333334, + "r_y2": 333.0, + "r_x3": 534.3333333333334, + "r_y3": 317.0, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.95838455, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 525.6666666666666, + "t": 398.3333333333333, + "r": 534.3333333333334, + "b": 463.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7514549493789673, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 398.3333333333333, + "r_x1": 525.6666666666666, + "r_y1": 422.0, + "r_x2": 534.3333333333334, + "r_y2": 422.0, + "r_x3": 534.3333333333334, + "r_y3": 398.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.9642998500000001, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 426.0, + "r_x1": 525.6666666666666, + "r_y1": 463.0, + "r_x2": 534.0, + "r_y2": 463.0, + "r_x3": 534.0, + "r_y3": 426.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96576363, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 479.3333333333333, + "t": 160.33333333333334, + "r": 488.0, + "b": 240.33333333333331, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.769959032535553, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 160.33333333333334, + "r_x1": 479.3333333333333, + "r_y1": 189.33333333333331, + "r_x2": 488.0, + "r_y2": 189.33333333333331, + "r_x3": 488.0, + "r_y3": 160.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96371613, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 193.33333333333331, + "r_x1": 479.3333333333333, + "r_y1": 219.0, + "r_x2": 488.0, + "r_y2": 219.0, + "r_x3": 488.0, + "r_y3": 193.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9634315499999999, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 222.66666666666669, + "r_x1": 479.3333333333333, + "r_y1": 240.33333333333331, + "r_x2": 485.6666666666667, + "r_y2": 240.33333333333331, + "r_x3": 485.6666666666667, + "r_y3": 222.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9611644699999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 479.3333333333333, + "t": 283.0, + "r": 488.0, + "b": 336.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7653545141220093, + "cells": [ + { + "index": 18, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 283.0, + "r_x1": 479.3333333333333, + "r_y1": 309.0, + "r_x2": 487.6666666666667, + "r_y2": 309.0, + "r_x3": 487.6666666666667, + "r_y3": 283.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.95751617, + "from_ocr": true + }, + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 312.33333333333337, + "r_x1": 479.3333333333333, + "r_y1": 336.33333333333337, + "r_x2": 488.0, + "r_y2": 336.33333333333337, + "r_x3": 488.0, + "r_y3": 312.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.9581434600000001, + "from_ocr": true + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "ecel", + "ched", + "ched", + "nl", + "rhed", + "fcel", + "fcel", + "nl", + "rhed", + "fcel", + "ecel", + "nl" + ], + "num_rows": 0, + "num_cols": 0, + "table_cells": [] + } + } }, "figures_classification": null, "equations_prediction": null, @@ -262,20 +2609,20 @@ "assembled": { "elements": [ { - "label": "page_header", - "id": 0, + "label": "section_header", + "id": 8, "page_no": 0, "cluster": { - "id": 0, - "label": "page_header", + "id": 8, + "label": "section_header", "bbox": { - "l": 717.168585936602, - "t": 70.90211682372312, - "r": 764.8982839673505, - "b": 504.8720061466397, + "l": 690.10272, + "t": 201.26337, + "r": 719.1490499999999, + "b": 417.96024, "coord_origin": "TOPLEFT" }, - "confidence": 0.6915205121040344, + "confidence": 0.7426818609237671, "cells": [ { "index": 0, @@ -286,22 +2633,43 @@ "a": 255 }, "rect": { - "r_x0": 744.0930045534915, - "r_y0": 504.87200373583954, - "r_x1": 764.8982839673505, - "r_y1": 504.87200373583954, - "r_x2": 764.8982839673505, - "r_y2": 73.34702001188118, - "r_x3": 744.0930045534915, - "r_y3": 73.34702001188118, + "r_x0": 690.10272, + "r_y0": 417.96024, + "r_x1": 719.1490499999999, + "r_y1": 417.96024, + "r_x2": 719.1490499999999, + "r_y2": 201.26337, + "r_x3": 690.10272, + "r_y3": 201.26337, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "This is a table test ", + "orig": "This is a table test ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - }, + "from_ocr": false + } + ], + "children": [] + }, + "text": "This is a table test" + }, + { + "label": "picture", + "id": 13, + "page_no": 0, + "cluster": { + "id": 13, + "label": "picture", + "bbox": { + "l": 668.9778442382812, + "t": 70.57099151611328, + "r": 683.4164962768555, + "b": 79.46605682373047, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.5229994654655457, + "cells": [ { "index": 1, "rgba": { @@ -311,42 +2679,87 @@ "a": 255 }, "rect": { - "r_x0": 717.168585936602, - "r_y0": 504.8720061466397, - "r_x1": 737.9738558137178, - "r_y1": 504.8720061466397, - "r_x2": 737.9738558137178, - "r_y2": 70.90211682372312, - "r_x3": 717.168585936602, - "r_y3": 70.90211682372312, + "r_x0": 669.96899, + "r_y0": 78.71936000000005, + "r_x1": 682.25806, + "r_y1": 78.71936000000005, + "r_x2": 682.25806, + "r_y2": 71.99987999999996, + "r_x3": 669.96899, + "r_y3": 71.99987999999996, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": " T", + "orig": " T", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false } ], - "children": [] + "children": [ + { + "id": 20, + "label": "text", + "bbox": { + "l": 669.96899, + "t": 71.99987999999996, + "r": 682.25806, + "b": 78.71936000000005, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 669.96899, + "r_y0": 78.71936000000005, + "r_x1": 682.25806, + "r_y1": 78.71936000000005, + "r_x2": 682.25806, + "r_y2": 71.99987999999996, + "r_x3": 669.96899, + "r_y3": 71.99987999999996, + "coord_origin": "TOPLEFT" + }, + "text": " T", + "orig": " T", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" + "text": "", + "annotations": [], + "provenance": null, + "predicted_class": null, + "confidence": null }, { "label": "text", - "id": 8, + "id": 0, "page_no": 0, "cluster": { - "id": 8, + "id": 0, "label": "text", "bbox": { - "l": 690.2441821046808, - "t": 72.124570639845, - "r": 709.8255852011977, - "b": 152.80629773131633, + "l": 640.87671, + "t": 78.71447999999998, + "r": 653.16504, + "b": 376.27319, "coord_origin": "TOPLEFT" }, - "confidence": 1.0, + "confidence": 0.8302523493766785, "cells": [ { "index": 2, @@ -357,92 +2770,988 @@ "a": 255 }, "rect": { - "r_x0": 690.2441821046808, - "r_y0": 152.80629773131633, - "r_x1": 709.8255852011977, - "r_y1": 152.80629773131633, - "r_x2": 709.8255852011977, - "r_y2": 72.124570639845, - "r_x3": 690.2441821046808, - "r_y3": 72.124570639845, + "r_x0": 640.87671, + "r_y0": 376.27319, + "r_x1": 653.16504, + "r_y1": 376.27319, + "r_x2": 653.16504, + "r_y2": 78.71447999999998, + "r_x3": 640.87671, + "r_y3": 78.71447999999998, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "heteststartswithsomerandomtextandthenatableimage: ", + "orig": "heteststartswithsomerandomtextandthenatableimage: ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false } ], "children": [] }, - "text": "package" + "text": "heteststartswithsomerandomtextandthenatableimage:" + }, + { + "label": "table", + "id": 11, + "page_no": 0, + "cluster": { + "id": 11, + "label": "table", + "bbox": { + "l": 460.5577697753906, + "t": 142.614013671875, + "r": 599.0364074707031, + "b": 499.7825622558594, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.5623787045478821, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 275.0, + "r_x1": 572.0, + "r_y1": 304.0, + "r_x2": 580.6666666666666, + "r_y2": 304.0, + "r_x3": 580.6666666666666, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95741158, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 308.0, + "r_x1": 572.0, + "r_y1": 343.66666666666663, + "r_x2": 580.3333333333334, + "r_y2": 343.66666666666663, + "r_x3": 580.3333333333334, + "r_y3": 308.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9541709899999999, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 381.3333333333333, + "r_x1": 572.0, + "r_y1": 410.3333333333333, + "r_x2": 580.6666666666666, + "r_y2": 410.3333333333333, + "r_x3": 580.6666666666666, + "r_y3": 381.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96515053, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 414.3333333333333, + "r_x1": 572.0, + "r_y1": 440.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 440.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 414.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9623101, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 443.3333333333333, + "r_x1": 572.0, + "r_y1": 479.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 479.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 443.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.94704376, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 175.0, + "r_x1": 525.6666666666666, + "r_y1": 204.0, + "r_x2": 534.3333333333334, + "r_y2": 204.0, + "r_x3": 534.3333333333334, + "r_y3": 175.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96139633, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 208.0, + "r_x1": 525.6666666666666, + "r_y1": 225.66666666666669, + "r_x2": 532.0, + "r_y2": 225.66666666666669, + "r_x3": 532.0, + "r_y3": 208.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9561322, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 285.66666666666663, + "r_x1": 525.6666666666666, + "r_y1": 313.0, + "r_x2": 532.0, + "r_y2": 313.0, + "r_x3": 532.0, + "r_y3": 285.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9615657, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 317.0, + "r_x1": 525.6666666666666, + "r_y1": 333.0, + "r_x2": 534.3333333333334, + "r_y2": 333.0, + "r_x3": 534.3333333333334, + "r_y3": 317.0, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.95838455, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 398.3333333333333, + "r_x1": 525.6666666666666, + "r_y1": 422.0, + "r_x2": 534.3333333333334, + "r_y2": 422.0, + "r_x3": 534.3333333333334, + "r_y3": 398.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.9642998500000001, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 426.0, + "r_x1": 525.6666666666666, + "r_y1": 463.0, + "r_x2": 534.0, + "r_y2": 463.0, + "r_x3": 534.0, + "r_y3": 426.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96576363, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 160.33333333333334, + "r_x1": 479.3333333333333, + "r_y1": 189.33333333333331, + "r_x2": 488.0, + "r_y2": 189.33333333333331, + "r_x3": 488.0, + "r_y3": 160.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96371613, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 193.33333333333331, + "r_x1": 479.3333333333333, + "r_y1": 219.0, + "r_x2": 488.0, + "r_y2": 219.0, + "r_x3": 488.0, + "r_y3": 193.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9634315499999999, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 222.66666666666669, + "r_x1": 479.3333333333333, + "r_y1": 240.33333333333331, + "r_x2": 485.6666666666667, + "r_y2": 240.33333333333331, + "r_x3": 485.6666666666667, + "r_y3": 222.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9611644699999999, + "from_ocr": true + }, + { + "index": 18, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 283.0, + "r_x1": 479.3333333333333, + "r_y1": 309.0, + "r_x2": 487.6666666666667, + "r_y2": 309.0, + "r_x3": 487.6666666666667, + "r_y3": 283.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.95751617, + "from_ocr": true + }, + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 312.33333333333337, + "r_x1": 479.3333333333333, + "r_y1": 336.33333333333337, + "r_x2": 488.0, + "r_y2": 336.33333333333337, + "r_x3": 488.0, + "r_y3": 312.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.9581434600000001, + "from_ocr": true + } + ], + "children": [ + { + "id": 3, + "label": "text", + "bbox": { + "l": 572.0, + "t": 275.0, + "r": 580.6666666666666, + "b": 343.66666666666663, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7740143537521362, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 275.0, + "r_x1": 572.0, + "r_y1": 304.0, + "r_x2": 580.6666666666666, + "r_y2": 304.0, + "r_x3": 580.6666666666666, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95741158, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 308.0, + "r_x1": 572.0, + "r_y1": 343.66666666666663, + "r_x2": 580.3333333333334, + "r_y2": 343.66666666666663, + "r_x3": 580.3333333333334, + "r_y3": 308.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9541709899999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 1, + "label": "text", + "bbox": { + "l": 572.0, + "t": 381.3333333333333, + "r": 580.6666666666666, + "b": 479.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7769111394882202, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 381.3333333333333, + "r_x1": 572.0, + "r_y1": 410.3333333333333, + "r_x2": 580.6666666666666, + "r_y2": 410.3333333333333, + "r_x3": 580.6666666666666, + "r_y3": 381.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96515053, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 414.3333333333333, + "r_x1": 572.0, + "r_y1": 440.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 440.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 414.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9623101, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 443.3333333333333, + "r_x1": 572.0, + "r_y1": 479.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 479.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 443.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.94704376, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 525.6666666666666, + "t": 175.0, + "r": 534.3333333333334, + "b": 225.66666666666669, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7583935856819153, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 175.0, + "r_x1": 525.6666666666666, + "r_y1": 204.0, + "r_x2": 534.3333333333334, + "r_y2": 204.0, + "r_x3": 534.3333333333334, + "r_y3": 175.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96139633, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 208.0, + "r_x1": 525.6666666666666, + "r_y1": 225.66666666666669, + "r_x2": 532.0, + "r_y2": 225.66666666666669, + "r_x3": 532.0, + "r_y3": 208.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9561322, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 525.6666666666666, + "t": 285.66666666666663, + "r": 534.3333333333334, + "b": 333.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7750864028930664, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 285.66666666666663, + "r_x1": 525.6666666666666, + "r_y1": 313.0, + "r_x2": 532.0, + "r_y2": 313.0, + "r_x3": 532.0, + "r_y3": 285.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9615657, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 317.0, + "r_x1": 525.6666666666666, + "r_y1": 333.0, + "r_x2": 534.3333333333334, + "r_y2": 333.0, + "r_x3": 534.3333333333334, + "r_y3": 317.0, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.95838455, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 525.6666666666666, + "t": 398.3333333333333, + "r": 534.3333333333334, + "b": 463.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7514549493789673, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 398.3333333333333, + "r_x1": 525.6666666666666, + "r_y1": 422.0, + "r_x2": 534.3333333333334, + "r_y2": 422.0, + "r_x3": 534.3333333333334, + "r_y3": 398.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.9642998500000001, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 426.0, + "r_x1": 525.6666666666666, + "r_y1": 463.0, + "r_x2": 534.0, + "r_y2": 463.0, + "r_x3": 534.0, + "r_y3": 426.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96576363, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 479.3333333333333, + "t": 160.33333333333334, + "r": 488.0, + "b": 240.33333333333331, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.769959032535553, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 160.33333333333334, + "r_x1": 479.3333333333333, + "r_y1": 189.33333333333331, + "r_x2": 488.0, + "r_y2": 189.33333333333331, + "r_x3": 488.0, + "r_y3": 160.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96371613, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 193.33333333333331, + "r_x1": 479.3333333333333, + "r_y1": 219.0, + "r_x2": 488.0, + "r_y2": 219.0, + "r_x3": 488.0, + "r_y3": 193.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9634315499999999, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 222.66666666666669, + "r_x1": 479.3333333333333, + "r_y1": 240.33333333333331, + "r_x2": 485.6666666666667, + "r_y2": 240.33333333333331, + "r_x3": 485.6666666666667, + "r_y3": 222.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9611644699999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 479.3333333333333, + "t": 283.0, + "r": 488.0, + "b": 336.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7653545141220093, + "cells": [ + { + "index": 18, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 283.0, + "r_x1": 479.3333333333333, + "r_y1": 309.0, + "r_x2": 487.6666666666667, + "r_y2": 309.0, + "r_x3": 487.6666666666667, + "r_y3": 283.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.95751617, + "from_ocr": true + }, + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 312.33333333333337, + "r_x1": 479.3333333333333, + "r_y1": 336.33333333333337, + "r_x2": 488.0, + "r_y2": 336.33333333333337, + "r_x3": 488.0, + "r_y3": 312.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.9581434600000001, + "from_ocr": true + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "ecel", + "ched", + "ched", + "nl", + "rhed", + "fcel", + "fcel", + "nl", + "rhed", + "fcel", + "ecel", + "nl" + ], + "num_rows": 0, + "num_cols": 0, + "table_cells": [] } ], "body": [ { - "label": "text", + "label": "section_header", "id": 8, "page_no": 0, "cluster": { "id": 8, - "label": "text", + "label": "section_header", "bbox": { - "l": 690.2441821046808, - "t": 72.124570639845, - "r": 709.8255852011977, - "b": 152.80629773131633, + "l": 690.10272, + "t": 201.26337, + "r": 719.1490499999999, + "b": 417.96024, "coord_origin": "TOPLEFT" }, - "confidence": 1.0, - "cells": [ - { - "index": 2, - "rgba": { - "r": 0, - "g": 0, - "b": 0, - "a": 255 - }, - "rect": { - "r_x0": 690.2441821046808, - "r_y0": 152.80629773131633, - "r_x1": 709.8255852011977, - "r_y1": 152.80629773131633, - "r_x2": 709.8255852011977, - "r_y2": 72.124570639845, - "r_x3": 690.2441821046808, - "r_y3": 72.124570639845, - "coord_origin": "TOPLEFT" - }, - "text": "package", - "orig": "package", - "text_direction": "left_to_right", - "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - "text": "package" - } - ], - "headers": [ - { - "label": "page_header", - "id": 0, - "page_no": 0, - "cluster": { - "id": 0, - "label": "page_header", - "bbox": { - "l": 717.168585936602, - "t": 70.90211682372312, - "r": 764.8982839673505, - "b": 504.8720061466397, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.6915205121040344, + "confidence": 0.7426818609237671, "cells": [ { "index": 0, @@ -453,22 +3762,43 @@ "a": 255 }, "rect": { - "r_x0": 744.0930045534915, - "r_y0": 504.87200373583954, - "r_x1": 764.8982839673505, - "r_y1": 504.87200373583954, - "r_x2": 764.8982839673505, - "r_y2": 73.34702001188118, - "r_x3": 744.0930045534915, - "r_y3": 73.34702001188118, + "r_x0": 690.10272, + "r_y0": 417.96024, + "r_x1": 719.1490499999999, + "r_y1": 417.96024, + "r_x2": 719.1490499999999, + "r_y2": 201.26337, + "r_x3": 690.10272, + "r_y3": 201.26337, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "This is a table test ", + "orig": "This is a table test ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - }, + "from_ocr": false + } + ], + "children": [] + }, + "text": "This is a table test" + }, + { + "label": "picture", + "id": 13, + "page_no": 0, + "cluster": { + "id": 13, + "label": "picture", + "bbox": { + "l": 668.9778442382812, + "t": 70.57099151611328, + "r": 683.4164962768555, + "b": 79.46605682373047, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.5229994654655457, + "cells": [ { "index": 1, "rgba": { @@ -478,28 +3808,1064 @@ "a": 255 }, "rect": { - "r_x0": 717.168585936602, - "r_y0": 504.8720061466397, - "r_x1": 737.9738558137178, - "r_y1": 504.8720061466397, - "r_x2": 737.9738558137178, - "r_y2": 70.90211682372312, - "r_x3": 717.168585936602, - "r_y3": 70.90211682372312, + "r_x0": 669.96899, + "r_y0": 78.71936000000005, + "r_x1": 682.25806, + "r_y1": 78.71936000000005, + "r_x2": 682.25806, + "r_y2": 71.99987999999996, + "r_x3": 669.96899, + "r_y3": 71.99987999999996, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": " T", + "orig": " T", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + } + ], + "children": [ + { + "id": 20, + "label": "text", + "bbox": { + "l": 669.96899, + "t": 71.99987999999996, + "r": 682.25806, + "b": 78.71936000000005, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 669.96899, + "r_y0": 78.71936000000005, + "r_x1": 682.25806, + "r_y1": 78.71936000000005, + "r_x2": 682.25806, + "r_y2": 71.99987999999996, + "r_x3": 669.96899, + "r_y3": 71.99987999999996, + "coord_origin": "TOPLEFT" + }, + "text": " T", + "orig": " T", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] + }, + "text": "", + "annotations": [], + "provenance": null, + "predicted_class": null, + "confidence": null + }, + { + "label": "text", + "id": 0, + "page_no": 0, + "cluster": { + "id": 0, + "label": "text", + "bbox": { + "l": 640.87671, + "t": 78.71447999999998, + "r": 653.16504, + "b": 376.27319, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.8302523493766785, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 640.87671, + "r_y0": 376.27319, + "r_x1": 653.16504, + "r_y1": 376.27319, + "r_x2": 653.16504, + "r_y2": 78.71447999999998, + "r_x3": 640.87671, + "r_y3": 78.71447999999998, + "coord_origin": "TOPLEFT" + }, + "text": "heteststartswithsomerandomtextandthenatableimage: ", + "orig": "heteststartswithsomerandomtextandthenatableimage: ", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], "children": [] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" + "text": "heteststartswithsomerandomtextandthenatableimage:" + }, + { + "label": "table", + "id": 11, + "page_no": 0, + "cluster": { + "id": 11, + "label": "table", + "bbox": { + "l": 460.5577697753906, + "t": 142.614013671875, + "r": 599.0364074707031, + "b": 499.7825622558594, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.5623787045478821, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 275.0, + "r_x1": 572.0, + "r_y1": 304.0, + "r_x2": 580.6666666666666, + "r_y2": 304.0, + "r_x3": 580.6666666666666, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95741158, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 308.0, + "r_x1": 572.0, + "r_y1": 343.66666666666663, + "r_x2": 580.3333333333334, + "r_y2": 343.66666666666663, + "r_x3": 580.3333333333334, + "r_y3": 308.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9541709899999999, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 381.3333333333333, + "r_x1": 572.0, + "r_y1": 410.3333333333333, + "r_x2": 580.6666666666666, + "r_y2": 410.3333333333333, + "r_x3": 580.6666666666666, + "r_y3": 381.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96515053, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 414.3333333333333, + "r_x1": 572.0, + "r_y1": 440.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 440.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 414.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9623101, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 443.3333333333333, + "r_x1": 572.0, + "r_y1": 479.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 479.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 443.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.94704376, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 175.0, + "r_x1": 525.6666666666666, + "r_y1": 204.0, + "r_x2": 534.3333333333334, + "r_y2": 204.0, + "r_x3": 534.3333333333334, + "r_y3": 175.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96139633, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 208.0, + "r_x1": 525.6666666666666, + "r_y1": 225.66666666666669, + "r_x2": 532.0, + "r_y2": 225.66666666666669, + "r_x3": 532.0, + "r_y3": 208.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9561322, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 285.66666666666663, + "r_x1": 525.6666666666666, + "r_y1": 313.0, + "r_x2": 532.0, + "r_y2": 313.0, + "r_x3": 532.0, + "r_y3": 285.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9615657, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 317.0, + "r_x1": 525.6666666666666, + "r_y1": 333.0, + "r_x2": 534.3333333333334, + "r_y2": 333.0, + "r_x3": 534.3333333333334, + "r_y3": 317.0, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.95838455, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 398.3333333333333, + "r_x1": 525.6666666666666, + "r_y1": 422.0, + "r_x2": 534.3333333333334, + "r_y2": 422.0, + "r_x3": 534.3333333333334, + "r_y3": 398.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.9642998500000001, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 426.0, + "r_x1": 525.6666666666666, + "r_y1": 463.0, + "r_x2": 534.0, + "r_y2": 463.0, + "r_x3": 534.0, + "r_y3": 426.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96576363, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 160.33333333333334, + "r_x1": 479.3333333333333, + "r_y1": 189.33333333333331, + "r_x2": 488.0, + "r_y2": 189.33333333333331, + "r_x3": 488.0, + "r_y3": 160.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96371613, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 193.33333333333331, + "r_x1": 479.3333333333333, + "r_y1": 219.0, + "r_x2": 488.0, + "r_y2": 219.0, + "r_x3": 488.0, + "r_y3": 193.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9634315499999999, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 222.66666666666669, + "r_x1": 479.3333333333333, + "r_y1": 240.33333333333331, + "r_x2": 485.6666666666667, + "r_y2": 240.33333333333331, + "r_x3": 485.6666666666667, + "r_y3": 222.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9611644699999999, + "from_ocr": true + }, + { + "index": 18, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 283.0, + "r_x1": 479.3333333333333, + "r_y1": 309.0, + "r_x2": 487.6666666666667, + "r_y2": 309.0, + "r_x3": 487.6666666666667, + "r_y3": 283.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.95751617, + "from_ocr": true + }, + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 312.33333333333337, + "r_x1": 479.3333333333333, + "r_y1": 336.33333333333337, + "r_x2": 488.0, + "r_y2": 336.33333333333337, + "r_x3": 488.0, + "r_y3": 312.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.9581434600000001, + "from_ocr": true + } + ], + "children": [ + { + "id": 3, + "label": "text", + "bbox": { + "l": 572.0, + "t": 275.0, + "r": 580.6666666666666, + "b": 343.66666666666663, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7740143537521362, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 275.0, + "r_x1": 572.0, + "r_y1": 304.0, + "r_x2": 580.6666666666666, + "r_y2": 304.0, + "r_x3": 580.6666666666666, + "r_y3": 275.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.95741158, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 308.0, + "r_x1": 572.0, + "r_y1": 343.66666666666663, + "r_x2": 580.3333333333334, + "r_y2": 343.66666666666663, + "r_x3": 580.3333333333334, + "r_y3": 308.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9541709899999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 1, + "label": "text", + "bbox": { + "l": 572.0, + "t": 381.3333333333333, + "r": 580.6666666666666, + "b": 479.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7769111394882202, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 381.3333333333333, + "r_x1": 572.0, + "r_y1": 410.3333333333333, + "r_x2": 580.6666666666666, + "r_y2": 410.3333333333333, + "r_x3": 580.6666666666666, + "r_y3": 381.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96515053, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 414.3333333333333, + "r_x1": 572.0, + "r_y1": 440.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 440.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 414.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9623101, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 572.0, + "r_y0": 443.3333333333333, + "r_x1": 572.0, + "r_y1": 479.3333333333333, + "r_x2": 580.3333333333334, + "r_y2": 479.3333333333333, + "r_x3": 580.3333333333334, + "r_y3": 443.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.94704376, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 525.6666666666666, + "t": 175.0, + "r": 534.3333333333334, + "b": 225.66666666666669, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7583935856819153, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 175.0, + "r_x1": 525.6666666666666, + "r_y1": 204.0, + "r_x2": 534.3333333333334, + "r_y2": 204.0, + "r_x3": 534.3333333333334, + "r_y3": 175.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96139633, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 208.0, + "r_x1": 525.6666666666666, + "r_y1": 225.66666666666669, + "r_x2": 532.0, + "r_y2": 225.66666666666669, + "r_x3": 532.0, + "r_y3": 208.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9561322, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 525.6666666666666, + "t": 285.66666666666663, + "r": 534.3333333333334, + "b": 333.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7750864028930664, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 285.66666666666663, + "r_x1": 525.6666666666666, + "r_y1": 313.0, + "r_x2": 532.0, + "r_y2": 313.0, + "r_x3": 532.0, + "r_y3": 285.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.9615657, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 317.0, + "r_x1": 525.6666666666666, + "r_y1": 333.0, + "r_x2": 534.3333333333334, + "r_y2": 333.0, + "r_x3": 534.3333333333334, + "r_y3": 317.0, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.95838455, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 525.6666666666666, + "t": 398.3333333333333, + "r": 534.3333333333334, + "b": 463.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7514549493789673, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 398.3333333333333, + "r_x1": 525.6666666666666, + "r_y1": 422.0, + "r_x2": 534.3333333333334, + "r_y2": 422.0, + "r_x3": 534.3333333333334, + "r_y3": 398.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.9642998500000001, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 525.6666666666666, + "r_y0": 426.0, + "r_x1": 525.6666666666666, + "r_y1": 463.0, + "r_x2": 534.0, + "r_y2": 463.0, + "r_x3": 534.0, + "r_y3": 426.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96576363, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 479.3333333333333, + "t": 160.33333333333334, + "r": 488.0, + "b": 240.33333333333331, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.769959032535553, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 160.33333333333334, + "r_x1": 479.3333333333333, + "r_y1": 189.33333333333331, + "r_x2": 488.0, + "r_y2": 189.33333333333331, + "r_x3": 488.0, + "r_y3": 160.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.96371613, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 193.33333333333331, + "r_x1": 479.3333333333333, + "r_y1": 219.0, + "r_x2": 488.0, + "r_y2": 219.0, + "r_x3": 488.0, + "r_y3": 193.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9634315499999999, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 222.66666666666669, + "r_x1": 479.3333333333333, + "r_y1": 240.33333333333331, + "r_x2": 485.6666666666667, + "r_y2": 240.33333333333331, + "r_x3": 485.6666666666667, + "r_y3": 222.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.9611644699999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 479.3333333333333, + "t": 283.0, + "r": 488.0, + "b": 336.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7653545141220093, + "cells": [ + { + "index": 18, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 283.0, + "r_x1": 479.3333333333333, + "r_y1": 309.0, + "r_x2": 487.6666666666667, + "r_y2": 309.0, + "r_x3": 487.6666666666667, + "r_y3": 283.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.95751617, + "from_ocr": true + }, + { + "index": 19, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 479.3333333333333, + "r_y0": 312.33333333333337, + "r_x1": 479.3333333333333, + "r_y1": 336.33333333333337, + "r_x2": 488.0, + "r_y2": 336.33333333333337, + "r_x3": 488.0, + "r_y3": 312.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.9581434600000001, + "from_ocr": true + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "ecel", + "ched", + "ched", + "nl", + "rhed", + "fcel", + "fcel", + "nl", + "rhed", + "fcel", + "ecel", + "nl" + ], + "num_rows": 0, + "num_cols": 0, + "table_cells": [] } - ] + ], + "headers": [] } } ] \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.pages.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.pages.json index e6bcce8c..b9d55049 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.pages.json +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.pages.json @@ -2,8 +2,8 @@ { "page_no": 0, "size": { - "width": 841.9216918945312, - "height": 595.201171875 + "width": 792.0, + "height": 612.0 }, "parsed_page": { "dimension": { @@ -11,47 +11,47 @@ "rect": { "r_x0": 0.0, "r_y0": 0.0, - "r_x1": 595.201171875, + "r_x1": 612.0, "r_y1": 0.0, - "r_x2": 595.201171875, - "r_y2": 841.9216918945312, + "r_x2": 612.0, + "r_y2": 792.0, "r_x3": 0.0, - "r_y3": 841.9216918945312, + "r_y3": 792.0, "coord_origin": "BOTTOMLEFT" }, "boundary_type": "crop_box", "art_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "bleed_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "crop_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "media_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "trim_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 792.0, + "r": 612.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" } @@ -69,21 +69,21 @@ "a": 255 }, "rect": { - "r_x0": 77.10171545548258, - "r_y0": 520.7638571913312, - "r_x1": 96.68315797053792, - "r_y1": 520.7638571913312, - "r_x2": 96.68315797053792, - "r_y2": 89.2388734673729, - "r_x3": 77.10171545548258, - "r_y3": 89.2388734673729, + "r_x0": 72.850723, + "r_y0": 410.7366, + "r_x1": 101.89737999999998, + "r_y1": 410.7366, + "r_x2": 101.89737999999998, + "r_y2": 194.03978999999998, + "r_x3": 72.850723, + "r_y3": 194.03978999999998, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "This is a table test ", + "orig": "This is a table test ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -94,21 +94,21 @@ "a": 255 }, "rect": { - "r_x0": 100.64168123325977, - "r_y0": 523.3236155182395, - "r_x1": 126.08064862014129, - "r_y1": 523.3236155182395, - "r_x2": 126.08064862014129, - "r_y2": 89.1266754140729, - "r_x3": 100.64168123325977, - "r_y3": 89.1266754140729, + "r_x0": 124.28839, + "r_y0": 540.000015, + "r_x1": 136.57715, + "r_y1": 540.000015, + "r_x2": 136.57715, + "r_y2": 235.72681, + "r_x3": 124.28839, + "r_y3": 235.72681, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Theteststartswithsomerandomtextandthenatableimage: ", + "orig": "Theteststartswithsomerandomtextandthenatableimage: ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 2, @@ -119,20 +119,395 @@ "a": 255 }, "rect": { - "r_x0": 131.21306574279092, - "r_y0": 521.0762158417759, - "r_x1": 152.19606490864376, - "r_y1": 521.0762158417759, - "r_x2": 152.19606490864376, - "r_y2": 441.0071698212682, - "r_x3": 131.21306574279092, - "r_y3": 441.0071698212682, + "r_x0": 191.0, + "r_y0": 337.0, + "r_x1": 191.0, + "r_y1": 308.0, + "r_x2": 182.33333333333334, + "r_y2": 308.0, + "r_x3": 182.33333333333334, + "r_y3": 337.0, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Some", + "orig": "Some", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 0.9576889799999999, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 304.0, + "r_x1": 191.0, + "r_y1": 268.33333333333337, + "r_x2": 182.33333333333334, + "r_y2": 268.33333333333337, + "r_x3": 182.33333333333334, + "r_y3": 304.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9576889799999999, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 230.66666666666666, + "r_x1": 191.0, + "r_y1": 201.66666666666669, + "r_x2": 182.33333333333334, + "r_y2": 201.66666666666669, + "r_x3": 182.33333333333334, + "r_y3": 230.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9617948900000001, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 197.66666666666669, + "r_x1": 191.0, + "r_y1": 172.0, + "r_x2": 182.33333333333334, + "r_y2": 172.0, + "r_x3": 182.33333333333334, + "r_y3": 197.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96105423, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 168.66666666666669, + "r_x1": 191.0, + "r_y1": 133.0, + "r_x2": 182.33333333333334, + "r_y2": 133.0, + "r_x3": 182.33333333333334, + "r_y3": 168.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95868614, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 437.3333333333333, + "r_x1": 237.33333333333331, + "r_y1": 408.3333333333333, + "r_x2": 228.66666666666669, + "r_y2": 408.3333333333333, + "r_x3": 228.66666666666669, + "r_y3": 437.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9579908, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 404.0, + "r_x1": 237.33333333333331, + "r_y1": 386.3333333333333, + "r_x2": 231.0, + "r_y2": 386.3333333333333, + "r_x3": 231.0, + "r_y3": 404.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.96640068, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 326.33333333333337, + "r_x1": 237.33333333333331, + "r_y1": 299.0, + "r_x2": 231.0, + "r_y2": 299.0, + "r_x3": 231.0, + "r_y3": 326.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.96376541, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 295.33333333333337, + "r_x1": 237.33333333333331, + "r_y1": 279.0, + "r_x2": 228.66666666666669, + "r_y2": 279.0, + "r_x3": 228.66666666666669, + "r_y3": 295.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.95824509, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 213.66666666666666, + "r_x1": 237.33333333333331, + "r_y1": 190.0, + "r_x2": 228.66666666666669, + "r_y2": 190.0, + "r_x3": 228.66666666666669, + "r_y3": 213.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.9643471499999999, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 186.0, + "r_x1": 237.33333333333331, + "r_y1": 149.0, + "r_x2": 229.0, + "r_y2": 149.0, + "r_x3": 229.0, + "r_y3": 186.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96289528, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 451.6666666666667, + "r_x1": 283.66666666666663, + "r_y1": 422.6666666666667, + "r_x2": 275.0, + "r_y2": 422.6666666666667, + "r_x3": 275.0, + "r_y3": 451.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9611363199999999, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 419.0, + "r_x1": 283.66666666666663, + "r_y1": 393.0, + "r_x2": 275.0, + "r_y2": 393.0, + "r_x3": 275.0, + "r_y3": 419.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9588653600000001, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 389.3333333333333, + "r_x1": 283.66666666666663, + "r_y1": 371.6666666666667, + "r_x2": 277.33333333333337, + "r_y2": 371.6666666666667, + "r_x3": 277.33333333333337, + "r_y3": 389.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.95681549, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 329.0, + "r_x1": 283.66666666666663, + "r_y1": 303.0, + "r_x2": 275.0, + "r_y2": 303.0, + "r_x3": 275.0, + "r_y3": 329.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9589106, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 299.66666666666663, + "r_x1": 283.66666666666663, + "r_y1": 275.66666666666663, + "r_x2": 275.0, + "r_y2": 275.66666666666663, + "r_x3": 275.0, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.96121948, "from_ocr": true } ], @@ -146,16 +521,16 @@ "layout": { "clusters": [ { - "id": 0, - "label": "page_header", + "id": 9, + "label": "section_header", "bbox": { - "l": 77.10171545548258, - "t": 89.1266754140729, - "r": 126.08064862014129, - "b": 523.3236155182395, + "l": 72.850723, + "t": 194.03978999999998, + "r": 101.89737999999998, + "b": 410.7366, "coord_origin": "TOPLEFT" }, - "confidence": 0.6016772389411926, + "confidence": 0.6652874946594238, "cells": [ { "index": 0, @@ -166,22 +541,37 @@ "a": 255 }, "rect": { - "r_x0": 77.10171545548258, - "r_y0": 520.7638571913312, - "r_x1": 96.68315797053792, - "r_y1": 520.7638571913312, - "r_x2": 96.68315797053792, - "r_y2": 89.2388734673729, - "r_x3": 77.10171545548258, - "r_y3": 89.2388734673729, + "r_x0": 72.850723, + "r_y0": 410.7366, + "r_x1": 101.89737999999998, + "r_y1": 410.7366, + "r_x2": 101.89737999999998, + "r_y2": 194.03978999999998, + "r_x3": 72.850723, + "r_y3": 194.03978999999998, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "This is a table test ", + "orig": "This is a table test ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - }, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 124.28839, + "t": 235.72681, + "r": 136.57715, + "b": 540.000015, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.8457421064376831, + "cells": [ { "index": 1, "rgba": { @@ -191,36 +581,36 @@ "a": 255 }, "rect": { - "r_x0": 100.64168123325977, - "r_y0": 523.3236155182395, - "r_x1": 126.08064862014129, - "r_y1": 523.3236155182395, - "r_x2": 126.08064862014129, - "r_y2": 89.1266754140729, - "r_x3": 100.64168123325977, - "r_y3": 89.1266754140729, + "r_x0": 124.28839, + "r_y0": 540.000015, + "r_x1": 136.57715, + "r_y1": 540.000015, + "r_x2": 136.57715, + "r_y2": 235.72681, + "r_x3": 124.28839, + "r_y3": 235.72681, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Theteststartswithsomerandomtextandthenatableimage: ", + "orig": "Theteststartswithsomerandomtextandthenatableimage: ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false } ], "children": [] }, { - "id": 1, - "label": "text", + "id": 8, + "label": "form", "bbox": { - "l": 131.21306574279092, - "t": 441.0071698212682, - "r": 152.19606490864376, - "b": 521.0762158417759, + "l": 182.33333333333334, + "t": 133.0, + "r": 283.66666666666663, + "b": 451.6666666666667, "coord_origin": "TOPLEFT" }, - "confidence": 0.5234212875366211, + "confidence": 0.7344542741775513, "cells": [ { "index": 2, @@ -231,24 +621,905 @@ "a": 255 }, "rect": { - "r_x0": 131.21306574279092, - "r_y0": 521.0762158417759, - "r_x1": 152.19606490864376, - "r_y1": 521.0762158417759, - "r_x2": 152.19606490864376, - "r_y2": 441.0071698212682, - "r_x3": 131.21306574279092, - "r_y3": 441.0071698212682, + "r_x0": 191.0, + "r_y0": 337.0, + "r_x1": 191.0, + "r_y1": 308.0, + "r_x2": 182.33333333333334, + "r_y2": 308.0, + "r_x3": 182.33333333333334, + "r_y3": 337.0, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Some", + "orig": "Some", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 0.9576889799999999, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 304.0, + "r_x1": 191.0, + "r_y1": 268.33333333333337, + "r_x2": 182.33333333333334, + "r_y2": 268.33333333333337, + "r_x3": 182.33333333333334, + "r_y3": 304.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9576889799999999, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 230.66666666666666, + "r_x1": 191.0, + "r_y1": 201.66666666666669, + "r_x2": 182.33333333333334, + "r_y2": 201.66666666666669, + "r_x3": 182.33333333333334, + "r_y3": 230.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9617948900000001, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 197.66666666666669, + "r_x1": 191.0, + "r_y1": 172.0, + "r_x2": 182.33333333333334, + "r_y2": 172.0, + "r_x3": 182.33333333333334, + "r_y3": 197.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96105423, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 168.66666666666669, + "r_x1": 191.0, + "r_y1": 133.0, + "r_x2": 182.33333333333334, + "r_y2": 133.0, + "r_x3": 182.33333333333334, + "r_y3": 168.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95868614, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 437.3333333333333, + "r_x1": 237.33333333333331, + "r_y1": 408.3333333333333, + "r_x2": 228.66666666666669, + "r_y2": 408.3333333333333, + "r_x3": 228.66666666666669, + "r_y3": 437.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9579908, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 404.0, + "r_x1": 237.33333333333331, + "r_y1": 386.3333333333333, + "r_x2": 231.0, + "r_y2": 386.3333333333333, + "r_x3": 231.0, + "r_y3": 404.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.96640068, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 326.33333333333337, + "r_x1": 237.33333333333331, + "r_y1": 299.0, + "r_x2": 231.0, + "r_y2": 299.0, + "r_x3": 231.0, + "r_y3": 326.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.96376541, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 295.33333333333337, + "r_x1": 237.33333333333331, + "r_y1": 279.0, + "r_x2": 228.66666666666669, + "r_y2": 279.0, + "r_x3": 228.66666666666669, + "r_y3": 295.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.95824509, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 213.66666666666666, + "r_x1": 237.33333333333331, + "r_y1": 190.0, + "r_x2": 228.66666666666669, + "r_y2": 190.0, + "r_x3": 228.66666666666669, + "r_y3": 213.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.9643471499999999, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 186.0, + "r_x1": 237.33333333333331, + "r_y1": 149.0, + "r_x2": 229.0, + "r_y2": 149.0, + "r_x3": 229.0, + "r_y3": 186.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96289528, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 451.6666666666667, + "r_x1": 283.66666666666663, + "r_y1": 422.6666666666667, + "r_x2": 275.0, + "r_y2": 422.6666666666667, + "r_x3": 275.0, + "r_y3": 451.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9611363199999999, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 419.0, + "r_x1": 283.66666666666663, + "r_y1": 393.0, + "r_x2": 275.0, + "r_y2": 393.0, + "r_x3": 275.0, + "r_y3": 419.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9588653600000001, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 389.3333333333333, + "r_x1": 283.66666666666663, + "r_y1": 371.6666666666667, + "r_x2": 277.33333333333337, + "r_y2": 371.6666666666667, + "r_x3": 277.33333333333337, + "r_y3": 389.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.95681549, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 329.0, + "r_x1": 283.66666666666663, + "r_y1": 303.0, + "r_x2": 275.0, + "r_y2": 303.0, + "r_x3": 275.0, + "r_y3": 329.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9589106, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 299.66666666666663, + "r_x1": 283.66666666666663, + "r_y1": 275.66666666666663, + "r_x2": 275.0, + "r_y2": 275.66666666666663, + "r_x3": 275.0, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.96121948, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 4, + "label": "text", + "bbox": { + "l": 182.33333333333334, + "t": 268.33333333333337, + "r": 191.0, + "b": 337.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9089116454124451, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 337.0, + "r_x1": 191.0, + "r_y1": 308.0, + "r_x2": 182.33333333333334, + "r_y2": 308.0, + "r_x3": 182.33333333333334, + "r_y3": 337.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9576889799999999, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 304.0, + "r_x1": 191.0, + "r_y1": 268.33333333333337, + "r_x2": 182.33333333333334, + "r_y2": 268.33333333333337, + "r_x3": 182.33333333333334, + "r_y3": 304.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9576889799999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 182.33333333333334, + "t": 133.0, + "r": 191.0, + "b": 230.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9040389060974121, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 230.66666666666666, + "r_x1": 191.0, + "r_y1": 201.66666666666669, + "r_x2": 182.33333333333334, + "r_y2": 201.66666666666669, + "r_x3": 182.33333333333334, + "r_y3": 230.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9617948900000001, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 197.66666666666669, + "r_x1": 191.0, + "r_y1": 172.0, + "r_x2": 182.33333333333334, + "r_y2": 172.0, + "r_x3": 182.33333333333334, + "r_y3": 197.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96105423, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 168.66666666666669, + "r_x1": 191.0, + "r_y1": 133.0, + "r_x2": 182.33333333333334, + "r_y2": 133.0, + "r_x3": 182.33333333333334, + "r_y3": 168.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95868614, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 0, + "label": "text", + "bbox": { + "l": 228.66666666666669, + "t": 386.3333333333333, + "r": 237.33333333333331, + "b": 437.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9155756235122681, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 437.3333333333333, + "r_x1": 237.33333333333331, + "r_y1": 408.3333333333333, + "r_x2": 228.66666666666669, + "r_y2": 408.3333333333333, + "r_x3": 228.66666666666669, + "r_y3": 437.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9579908, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 404.0, + "r_x1": 237.33333333333331, + "r_y1": 386.3333333333333, + "r_x2": 231.0, + "r_y2": 386.3333333333333, + "r_x3": 231.0, + "r_y3": 404.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.96640068, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 228.66666666666669, + "t": 279.0, + "r": 237.33333333333331, + "b": 326.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9143174290657043, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 326.33333333333337, + "r_x1": 237.33333333333331, + "r_y1": 299.0, + "r_x2": 231.0, + "r_y2": 299.0, + "r_x3": 231.0, + "r_y3": 326.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.96376541, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 295.33333333333337, + "r_x1": 237.33333333333331, + "r_y1": 279.0, + "r_x2": 228.66666666666669, + "r_y2": 279.0, + "r_x3": 228.66666666666669, + "r_y3": 295.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.95824509, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 228.66666666666669, + "t": 149.0, + "r": 237.33333333333331, + "b": 213.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9003775715827942, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 213.66666666666666, + "r_x1": 237.33333333333331, + "r_y1": 190.0, + "r_x2": 228.66666666666669, + "r_y2": 190.0, + "r_x3": 228.66666666666669, + "r_y3": 213.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.9643471499999999, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 186.0, + "r_x1": 237.33333333333331, + "r_y1": 149.0, + "r_x2": 229.0, + "r_y2": 149.0, + "r_x3": 229.0, + "r_y3": 186.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96289528, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 1, + "label": "text", + "bbox": { + "l": 275.0, + "t": 371.6666666666667, + "r": 283.66666666666663, + "b": 451.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9147250652313232, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 451.6666666666667, + "r_x1": 283.66666666666663, + "r_y1": 422.6666666666667, + "r_x2": 275.0, + "r_y2": 422.6666666666667, + "r_x3": 275.0, + "r_y3": 451.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9611363199999999, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 419.0, + "r_x1": 283.66666666666663, + "r_y1": 393.0, + "r_x2": 275.0, + "r_y2": 393.0, + "r_x3": 275.0, + "r_y3": 419.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9588653600000001, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 389.3333333333333, + "r_x1": 283.66666666666663, + "r_y1": 371.6666666666667, + "r_x2": 277.33333333333337, + "r_y2": 371.6666666666667, + "r_x3": 277.33333333333337, + "r_y3": 389.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.95681549, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 275.0, + "t": 275.66666666666663, + "r": 283.66666666666663, + "b": 329.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9124712347984314, + "cells": [ + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 329.0, + "r_x1": 283.66666666666663, + "r_y1": 303.0, + "r_x2": 275.0, + "r_y2": 303.0, + "r_x3": 275.0, + "r_y3": 329.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9589106, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 299.66666666666663, + "r_x1": 283.66666666666663, + "r_y1": 275.66666666666663, + "r_x2": 275.0, + "r_y2": 275.66666666666663, + "r_x3": 275.0, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.96121948, + "from_ocr": true + } + ], + "children": [] + } + ] } ] }, @@ -262,20 +1533,20 @@ "assembled": { "elements": [ { - "label": "page_header", - "id": 0, + "label": "section_header", + "id": 9, "page_no": 0, "cluster": { - "id": 0, - "label": "page_header", + "id": 9, + "label": "section_header", "bbox": { - "l": 77.10171545548258, - "t": 89.1266754140729, - "r": 126.08064862014129, - "b": 523.3236155182395, + "l": 72.850723, + "t": 194.03978999999998, + "r": 101.89737999999998, + "b": 410.7366, "coord_origin": "TOPLEFT" }, - "confidence": 0.6016772389411926, + "confidence": 0.6652874946594238, "cells": [ { "index": 0, @@ -286,22 +1557,43 @@ "a": 255 }, "rect": { - "r_x0": 77.10171545548258, - "r_y0": 520.7638571913312, - "r_x1": 96.68315797053792, - "r_y1": 520.7638571913312, - "r_x2": 96.68315797053792, - "r_y2": 89.2388734673729, - "r_x3": 77.10171545548258, - "r_y3": 89.2388734673729, + "r_x0": 72.850723, + "r_y0": 410.7366, + "r_x1": 101.89737999999998, + "r_y1": 410.7366, + "r_x2": 101.89737999999998, + "r_y2": 194.03978999999998, + "r_x3": 72.850723, + "r_y3": 194.03978999999998, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "This is a table test ", + "orig": "This is a table test ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - }, + "from_ocr": false + } + ], + "children": [] + }, + "text": "This is a table test" + }, + { + "label": "text", + "id": 7, + "page_no": 0, + "cluster": { + "id": 7, + "label": "text", + "bbox": { + "l": 124.28839, + "t": 235.72681, + "r": 136.57715, + "b": 540.000015, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.8457421064376831, + "cells": [ { "index": 1, "rgba": { @@ -311,42 +1603,42 @@ "a": 255 }, "rect": { - "r_x0": 100.64168123325977, - "r_y0": 523.3236155182395, - "r_x1": 126.08064862014129, - "r_y1": 523.3236155182395, - "r_x2": 126.08064862014129, - "r_y2": 89.1266754140729, - "r_x3": 100.64168123325977, - "r_y3": 89.1266754140729, + "r_x0": 124.28839, + "r_y0": 540.000015, + "r_x1": 136.57715, + "r_y1": 540.000015, + "r_x2": 136.57715, + "r_y2": 235.72681, + "r_x3": 124.28839, + "r_y3": 235.72681, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Theteststartswithsomerandomtextandthenatableimage: ", + "orig": "Theteststartswithsomerandomtextandthenatableimage: ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false } ], "children": [] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" + "text": "Theteststartswithsomerandomtextandthenatableimage:" }, { - "label": "text", - "id": 1, + "label": "form", + "id": 8, "page_no": 0, "cluster": { - "id": 1, - "label": "text", + "id": 8, + "label": "form", "bbox": { - "l": 131.21306574279092, - "t": 441.0071698212682, - "r": 152.19606490864376, - "b": 521.0762158417759, + "l": 182.33333333333334, + "t": 133.0, + "r": 283.66666666666663, + "b": 451.6666666666667, "coord_origin": "TOPLEFT" }, - "confidence": 0.5234212875366211, + "confidence": 0.7344542741775513, "cells": [ { "index": 2, @@ -357,92 +1649,925 @@ "a": 255 }, "rect": { - "r_x0": 131.21306574279092, - "r_y0": 521.0762158417759, - "r_x1": 152.19606490864376, - "r_y1": 521.0762158417759, - "r_x2": 152.19606490864376, - "r_y2": 441.0071698212682, - "r_x3": 131.21306574279092, - "r_y3": 441.0071698212682, + "r_x0": 191.0, + "r_y0": 337.0, + "r_x1": 191.0, + "r_y1": 308.0, + "r_x2": 182.33333333333334, + "r_y2": 308.0, + "r_x3": 182.33333333333334, + "r_y3": 337.0, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Some", + "orig": "Some", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 0.9576889799999999, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 304.0, + "r_x1": 191.0, + "r_y1": 268.33333333333337, + "r_x2": 182.33333333333334, + "r_y2": 268.33333333333337, + "r_x3": 182.33333333333334, + "r_y3": 304.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9576889799999999, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 230.66666666666666, + "r_x1": 191.0, + "r_y1": 201.66666666666669, + "r_x2": 182.33333333333334, + "r_y2": 201.66666666666669, + "r_x3": 182.33333333333334, + "r_y3": 230.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9617948900000001, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 197.66666666666669, + "r_x1": 191.0, + "r_y1": 172.0, + "r_x2": 182.33333333333334, + "r_y2": 172.0, + "r_x3": 182.33333333333334, + "r_y3": 197.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96105423, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 168.66666666666669, + "r_x1": 191.0, + "r_y1": 133.0, + "r_x2": 182.33333333333334, + "r_y2": 133.0, + "r_x3": 182.33333333333334, + "r_y3": 168.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95868614, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 437.3333333333333, + "r_x1": 237.33333333333331, + "r_y1": 408.3333333333333, + "r_x2": 228.66666666666669, + "r_y2": 408.3333333333333, + "r_x3": 228.66666666666669, + "r_y3": 437.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9579908, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 404.0, + "r_x1": 237.33333333333331, + "r_y1": 386.3333333333333, + "r_x2": 231.0, + "r_y2": 386.3333333333333, + "r_x3": 231.0, + "r_y3": 404.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.96640068, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 326.33333333333337, + "r_x1": 237.33333333333331, + "r_y1": 299.0, + "r_x2": 231.0, + "r_y2": 299.0, + "r_x3": 231.0, + "r_y3": 326.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.96376541, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 295.33333333333337, + "r_x1": 237.33333333333331, + "r_y1": 279.0, + "r_x2": 228.66666666666669, + "r_y2": 279.0, + "r_x3": 228.66666666666669, + "r_y3": 295.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.95824509, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 213.66666666666666, + "r_x1": 237.33333333333331, + "r_y1": 190.0, + "r_x2": 228.66666666666669, + "r_y2": 190.0, + "r_x3": 228.66666666666669, + "r_y3": 213.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.9643471499999999, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 186.0, + "r_x1": 237.33333333333331, + "r_y1": 149.0, + "r_x2": 229.0, + "r_y2": 149.0, + "r_x3": 229.0, + "r_y3": 186.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96289528, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 451.6666666666667, + "r_x1": 283.66666666666663, + "r_y1": 422.6666666666667, + "r_x2": 275.0, + "r_y2": 422.6666666666667, + "r_x3": 275.0, + "r_y3": 451.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9611363199999999, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 419.0, + "r_x1": 283.66666666666663, + "r_y1": 393.0, + "r_x2": 275.0, + "r_y2": 393.0, + "r_x3": 275.0, + "r_y3": 419.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9588653600000001, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 389.3333333333333, + "r_x1": 283.66666666666663, + "r_y1": 371.6666666666667, + "r_x2": 277.33333333333337, + "r_y2": 371.6666666666667, + "r_x3": 277.33333333333337, + "r_y3": 389.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.95681549, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 329.0, + "r_x1": 283.66666666666663, + "r_y1": 303.0, + "r_x2": 275.0, + "r_y2": 303.0, + "r_x3": 275.0, + "r_y3": 329.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9589106, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 299.66666666666663, + "r_x1": 283.66666666666663, + "r_y1": 275.66666666666663, + "r_x2": 275.0, + "r_y2": 275.66666666666663, + "r_x3": 275.0, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.96121948, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 4, + "label": "text", + "bbox": { + "l": 182.33333333333334, + "t": 268.33333333333337, + "r": 191.0, + "b": 337.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9089116454124451, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 337.0, + "r_x1": 191.0, + "r_y1": 308.0, + "r_x2": 182.33333333333334, + "r_y2": 308.0, + "r_x3": 182.33333333333334, + "r_y3": 337.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9576889799999999, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 304.0, + "r_x1": 191.0, + "r_y1": 268.33333333333337, + "r_x2": 182.33333333333334, + "r_y2": 268.33333333333337, + "r_x3": 182.33333333333334, + "r_y3": 304.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9576889799999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 182.33333333333334, + "t": 133.0, + "r": 191.0, + "b": 230.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9040389060974121, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 230.66666666666666, + "r_x1": 191.0, + "r_y1": 201.66666666666669, + "r_x2": 182.33333333333334, + "r_y2": 201.66666666666669, + "r_x3": 182.33333333333334, + "r_y3": 230.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9617948900000001, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 197.66666666666669, + "r_x1": 191.0, + "r_y1": 172.0, + "r_x2": 182.33333333333334, + "r_y2": 172.0, + "r_x3": 182.33333333333334, + "r_y3": 197.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96105423, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 168.66666666666669, + "r_x1": 191.0, + "r_y1": 133.0, + "r_x2": 182.33333333333334, + "r_y2": 133.0, + "r_x3": 182.33333333333334, + "r_y3": 168.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95868614, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 0, + "label": "text", + "bbox": { + "l": 228.66666666666669, + "t": 386.3333333333333, + "r": 237.33333333333331, + "b": 437.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9155756235122681, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 437.3333333333333, + "r_x1": 237.33333333333331, + "r_y1": 408.3333333333333, + "r_x2": 228.66666666666669, + "r_y2": 408.3333333333333, + "r_x3": 228.66666666666669, + "r_y3": 437.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9579908, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 404.0, + "r_x1": 237.33333333333331, + "r_y1": 386.3333333333333, + "r_x2": 231.0, + "r_y2": 386.3333333333333, + "r_x3": 231.0, + "r_y3": 404.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.96640068, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 228.66666666666669, + "t": 279.0, + "r": 237.33333333333331, + "b": 326.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9143174290657043, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 326.33333333333337, + "r_x1": 237.33333333333331, + "r_y1": 299.0, + "r_x2": 231.0, + "r_y2": 299.0, + "r_x3": 231.0, + "r_y3": 326.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.96376541, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 295.33333333333337, + "r_x1": 237.33333333333331, + "r_y1": 279.0, + "r_x2": 228.66666666666669, + "r_y2": 279.0, + "r_x3": 228.66666666666669, + "r_y3": 295.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.95824509, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 228.66666666666669, + "t": 149.0, + "r": 237.33333333333331, + "b": 213.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9003775715827942, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 213.66666666666666, + "r_x1": 237.33333333333331, + "r_y1": 190.0, + "r_x2": 228.66666666666669, + "r_y2": 190.0, + "r_x3": 228.66666666666669, + "r_y3": 213.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.9643471499999999, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 186.0, + "r_x1": 237.33333333333331, + "r_y1": 149.0, + "r_x2": 229.0, + "r_y2": 149.0, + "r_x3": 229.0, + "r_y3": 186.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96289528, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 1, + "label": "text", + "bbox": { + "l": 275.0, + "t": 371.6666666666667, + "r": 283.66666666666663, + "b": 451.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9147250652313232, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 451.6666666666667, + "r_x1": 283.66666666666663, + "r_y1": 422.6666666666667, + "r_x2": 275.0, + "r_y2": 422.6666666666667, + "r_x3": 275.0, + "r_y3": 451.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9611363199999999, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 419.0, + "r_x1": 283.66666666666663, + "r_y1": 393.0, + "r_x2": 275.0, + "r_y2": 393.0, + "r_x3": 275.0, + "r_y3": 419.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9588653600000001, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 389.3333333333333, + "r_x1": 283.66666666666663, + "r_y1": 371.6666666666667, + "r_x2": 277.33333333333337, + "r_y2": 371.6666666666667, + "r_x3": 277.33333333333337, + "r_y3": 389.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.95681549, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 275.0, + "t": 275.66666666666663, + "r": 283.66666666666663, + "b": 329.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9124712347984314, + "cells": [ + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 329.0, + "r_x1": 283.66666666666663, + "r_y1": 303.0, + "r_x2": 275.0, + "r_y2": 303.0, + "r_x3": 275.0, + "r_y3": 329.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9589106, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 299.66666666666663, + "r_x1": 283.66666666666663, + "r_y1": 275.66666666666663, + "r_x2": 275.0, + "r_y2": 275.66666666666663, + "r_x3": 275.0, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.96121948, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "package" + "text": null } ], "body": [ { - "label": "text", - "id": 1, + "label": "section_header", + "id": 9, "page_no": 0, "cluster": { - "id": 1, - "label": "text", + "id": 9, + "label": "section_header", "bbox": { - "l": 131.21306574279092, - "t": 441.0071698212682, - "r": 152.19606490864376, - "b": 521.0762158417759, + "l": 72.850723, + "t": 194.03978999999998, + "r": 101.89737999999998, + "b": 410.7366, "coord_origin": "TOPLEFT" }, - "confidence": 0.5234212875366211, - "cells": [ - { - "index": 2, - "rgba": { - "r": 0, - "g": 0, - "b": 0, - "a": 255 - }, - "rect": { - "r_x0": 131.21306574279092, - "r_y0": 521.0762158417759, - "r_x1": 152.19606490864376, - "r_y1": 521.0762158417759, - "r_x2": 152.19606490864376, - "r_y2": 441.0071698212682, - "r_x3": 131.21306574279092, - "r_y3": 441.0071698212682, - "coord_origin": "TOPLEFT" - }, - "text": "package", - "orig": "package", - "text_direction": "left_to_right", - "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - "text": "package" - } - ], - "headers": [ - { - "label": "page_header", - "id": 0, - "page_no": 0, - "cluster": { - "id": 0, - "label": "page_header", - "bbox": { - "l": 77.10171545548258, - "t": 89.1266754140729, - "r": 126.08064862014129, - "b": 523.3236155182395, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.6016772389411926, + "confidence": 0.6652874946594238, "cells": [ { "index": 0, @@ -453,22 +2578,43 @@ "a": 255 }, "rect": { - "r_x0": 77.10171545548258, - "r_y0": 520.7638571913312, - "r_x1": 96.68315797053792, - "r_y1": 520.7638571913312, - "r_x2": 96.68315797053792, - "r_y2": 89.2388734673729, - "r_x3": 77.10171545548258, - "r_y3": 89.2388734673729, + "r_x0": 72.850723, + "r_y0": 410.7366, + "r_x1": 101.89737999999998, + "r_y1": 410.7366, + "r_x2": 101.89737999999998, + "r_y2": 194.03978999999998, + "r_x3": 72.850723, + "r_y3": 194.03978999999998, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "This is a table test ", + "orig": "This is a table test ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - }, + "from_ocr": false + } + ], + "children": [] + }, + "text": "This is a table test" + }, + { + "label": "text", + "id": 7, + "page_no": 0, + "cluster": { + "id": 7, + "label": "text", + "bbox": { + "l": 124.28839, + "t": 235.72681, + "r": 136.57715, + "b": 540.000015, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.8457421064376831, + "cells": [ { "index": 1, "rgba": { @@ -478,28 +2624,956 @@ "a": 255 }, "rect": { - "r_x0": 100.64168123325977, - "r_y0": 523.3236155182395, - "r_x1": 126.08064862014129, - "r_y1": 523.3236155182395, - "r_x2": 126.08064862014129, - "r_y2": 89.1266754140729, - "r_x3": 100.64168123325977, - "r_y3": 89.1266754140729, + "r_x0": 124.28839, + "r_y0": 540.000015, + "r_x1": 136.57715, + "r_y1": 540.000015, + "r_x2": 136.57715, + "r_y2": 235.72681, + "r_x3": 124.28839, + "r_y3": 235.72681, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Theteststartswithsomerandomtextandthenatableimage: ", + "orig": "Theteststartswithsomerandomtextandthenatableimage: ", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false } ], "children": [] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" + "text": "Theteststartswithsomerandomtextandthenatableimage:" + }, + { + "label": "form", + "id": 8, + "page_no": 0, + "cluster": { + "id": 8, + "label": "form", + "bbox": { + "l": 182.33333333333334, + "t": 133.0, + "r": 283.66666666666663, + "b": 451.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.7344542741775513, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 337.0, + "r_x1": 191.0, + "r_y1": 308.0, + "r_x2": 182.33333333333334, + "r_y2": 308.0, + "r_x3": 182.33333333333334, + "r_y3": 337.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9576889799999999, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 304.0, + "r_x1": 191.0, + "r_y1": 268.33333333333337, + "r_x2": 182.33333333333334, + "r_y2": 268.33333333333337, + "r_x3": 182.33333333333334, + "r_y3": 304.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9576889799999999, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 230.66666666666666, + "r_x1": 191.0, + "r_y1": 201.66666666666669, + "r_x2": 182.33333333333334, + "r_y2": 201.66666666666669, + "r_x3": 182.33333333333334, + "r_y3": 230.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9617948900000001, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 197.66666666666669, + "r_x1": 191.0, + "r_y1": 172.0, + "r_x2": 182.33333333333334, + "r_y2": 172.0, + "r_x3": 182.33333333333334, + "r_y3": 197.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96105423, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 168.66666666666669, + "r_x1": 191.0, + "r_y1": 133.0, + "r_x2": 182.33333333333334, + "r_y2": 133.0, + "r_x3": 182.33333333333334, + "r_y3": 168.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95868614, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 437.3333333333333, + "r_x1": 237.33333333333331, + "r_y1": 408.3333333333333, + "r_x2": 228.66666666666669, + "r_y2": 408.3333333333333, + "r_x3": 228.66666666666669, + "r_y3": 437.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9579908, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 404.0, + "r_x1": 237.33333333333331, + "r_y1": 386.3333333333333, + "r_x2": 231.0, + "r_y2": 386.3333333333333, + "r_x3": 231.0, + "r_y3": 404.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.96640068, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 326.33333333333337, + "r_x1": 237.33333333333331, + "r_y1": 299.0, + "r_x2": 231.0, + "r_y2": 299.0, + "r_x3": 231.0, + "r_y3": 326.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.96376541, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 295.33333333333337, + "r_x1": 237.33333333333331, + "r_y1": 279.0, + "r_x2": 228.66666666666669, + "r_y2": 279.0, + "r_x3": 228.66666666666669, + "r_y3": 295.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.95824509, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 213.66666666666666, + "r_x1": 237.33333333333331, + "r_y1": 190.0, + "r_x2": 228.66666666666669, + "r_y2": 190.0, + "r_x3": 228.66666666666669, + "r_y3": 213.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.9643471499999999, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 186.0, + "r_x1": 237.33333333333331, + "r_y1": 149.0, + "r_x2": 229.0, + "r_y2": 149.0, + "r_x3": 229.0, + "r_y3": 186.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96289528, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 451.6666666666667, + "r_x1": 283.66666666666663, + "r_y1": 422.6666666666667, + "r_x2": 275.0, + "r_y2": 422.6666666666667, + "r_x3": 275.0, + "r_y3": 451.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9611363199999999, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 419.0, + "r_x1": 283.66666666666663, + "r_y1": 393.0, + "r_x2": 275.0, + "r_y2": 393.0, + "r_x3": 275.0, + "r_y3": 419.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9588653600000001, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 389.3333333333333, + "r_x1": 283.66666666666663, + "r_y1": 371.6666666666667, + "r_x2": 277.33333333333337, + "r_y2": 371.6666666666667, + "r_x3": 277.33333333333337, + "r_y3": 389.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.95681549, + "from_ocr": true + }, + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 329.0, + "r_x1": 283.66666666666663, + "r_y1": 303.0, + "r_x2": 275.0, + "r_y2": 303.0, + "r_x3": 275.0, + "r_y3": 329.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9589106, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 299.66666666666663, + "r_x1": 283.66666666666663, + "r_y1": 275.66666666666663, + "r_x2": 275.0, + "r_y2": 275.66666666666663, + "r_x3": 275.0, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.96121948, + "from_ocr": true + } + ], + "children": [ + { + "id": 4, + "label": "text", + "bbox": { + "l": 182.33333333333334, + "t": 268.33333333333337, + "r": 191.0, + "b": 337.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9089116454124451, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 337.0, + "r_x1": 191.0, + "r_y1": 308.0, + "r_x2": 182.33333333333334, + "r_y2": 308.0, + "r_x3": 182.33333333333334, + "r_y3": 337.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9576889799999999, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 304.0, + "r_x1": 191.0, + "r_y1": 268.33333333333337, + "r_x2": 182.33333333333334, + "r_y2": 268.33333333333337, + "r_x3": 182.33333333333334, + "r_y3": 304.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.9576889799999999, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 182.33333333333334, + "t": 133.0, + "r": 191.0, + "b": 230.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9040389060974121, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 230.66666666666666, + "r_x1": 191.0, + "r_y1": 201.66666666666669, + "r_x2": 182.33333333333334, + "r_y2": 201.66666666666669, + "r_x3": 182.33333333333334, + "r_y3": 230.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9617948900000001, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 197.66666666666669, + "r_x1": 191.0, + "r_y1": 172.0, + "r_x2": 182.33333333333334, + "r_y2": 172.0, + "r_x3": 182.33333333333334, + "r_y3": 197.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.96105423, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 191.0, + "r_y0": 168.66666666666669, + "r_x1": 191.0, + "r_y1": 133.0, + "r_x2": 182.33333333333334, + "r_y2": 133.0, + "r_x3": 182.33333333333334, + "r_y3": 168.66666666666669, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 0.95868614, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 0, + "label": "text", + "bbox": { + "l": 228.66666666666669, + "t": 386.3333333333333, + "r": 237.33333333333331, + "b": 437.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9155756235122681, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 437.3333333333333, + "r_x1": 237.33333333333331, + "r_y1": 408.3333333333333, + "r_x2": 228.66666666666669, + "r_y2": 408.3333333333333, + "r_x3": 228.66666666666669, + "r_y3": 437.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9579908, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 404.0, + "r_x1": 237.33333333333331, + "r_y1": 386.3333333333333, + "r_x2": 231.0, + "r_y2": 386.3333333333333, + "r_x3": 231.0, + "r_y3": 404.0, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.96640068, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 228.66666666666669, + "t": 279.0, + "r": 237.33333333333331, + "b": 326.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9143174290657043, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 326.33333333333337, + "r_x1": 237.33333333333331, + "r_y1": 299.0, + "r_x2": 231.0, + "r_y2": 299.0, + "r_x3": 231.0, + "r_y3": 326.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "some", + "orig": "some", + "text_direction": "left_to_right", + "confidence": 0.96376541, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 295.33333333333337, + "r_x1": 237.33333333333331, + "r_y1": 279.0, + "r_x2": 228.66666666666669, + "r_y2": 279.0, + "r_x3": 228.66666666666669, + "r_y3": 295.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "cell", + "orig": "cell", + "text_direction": "left_to_right", + "confidence": 0.95824509, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 228.66666666666669, + "t": 149.0, + "r": 237.33333333333331, + "b": 213.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9003775715827942, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 213.66666666666666, + "r_x1": 237.33333333333331, + "r_y1": 190.0, + "r_x2": 228.66666666666669, + "r_y2": 190.0, + "r_x3": 228.66666666666669, + "r_y3": 213.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 0.9643471499999999, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 237.33333333333331, + "r_y0": 186.0, + "r_x1": 237.33333333333331, + "r_y1": 149.0, + "r_x2": 229.0, + "r_y2": 149.0, + "r_x3": 229.0, + "r_y3": 186.0, + "coord_origin": "TOPLEFT" + }, + "text": "content", + "orig": "content", + "text_direction": "left_to_right", + "confidence": 0.96289528, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 1, + "label": "text", + "bbox": { + "l": 275.0, + "t": 371.6666666666667, + "r": 283.66666666666663, + "b": 451.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9147250652313232, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 451.6666666666667, + "r_x1": 283.66666666666663, + "r_y1": 422.6666666666667, + "r_x2": 275.0, + "r_y2": 422.6666666666667, + "r_x3": 275.0, + "r_y3": 451.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Some", + "orig": "Some", + "text_direction": "left_to_right", + "confidence": 0.9611363199999999, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 419.0, + "r_x1": 283.66666666666663, + "r_y1": 393.0, + "r_x2": 275.0, + "r_y2": 393.0, + "r_x3": 275.0, + "r_y3": 419.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9588653600000001, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 389.3333333333333, + "r_x1": 283.66666666666663, + "r_y1": 371.6666666666667, + "r_x2": 277.33333333333337, + "r_y2": 371.6666666666667, + "r_x3": 277.33333333333337, + "r_y3": 389.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "row", + "orig": "row", + "text_direction": "left_to_right", + "confidence": 0.95681549, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 275.0, + "t": 275.66666666666663, + "r": 283.66666666666663, + "b": 329.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9124712347984314, + "cells": [ + { + "index": 16, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 329.0, + "r_x1": 283.66666666666663, + "r_y1": 303.0, + "r_x2": 275.0, + "r_y2": 303.0, + "r_x3": 275.0, + "r_y3": 329.0, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 0.9589106, + "from_ocr": true + }, + { + "index": 17, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 283.66666666666663, + "r_y0": 299.66666666666663, + "r_x1": 283.66666666666663, + "r_y1": 275.66666666666663, + "r_x2": 275.0, + "r_y2": 275.66666666666663, + "r_x3": 275.0, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "don't", + "orig": "don't", + "text_direction": "left_to_right", + "confidence": 0.96121948, + "from_ocr": true + } + ], + "children": [] + } + ] + }, + "text": null } - ] + ], + "headers": [] } } ] \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test.doctags.txt b/tests/data_scanned/groundtruth/docling_v2/ocr_test.doctags.txt index c210e4dd..89a0eb20 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test.doctags.txt +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test.doctags.txt @@ -1,2 +1,2 @@ -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package +Column 0Column 1Column 2this is row 0some cellshave contentandand row 1otherhaveand last row 2nothinginside \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test.json index 22a1c54d..e0be74fe 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test.json @@ -4,7 +4,7 @@ "name": "ocr_test", "origin": { "mimetype": "application/pdf", - "binary_hash": 14853448746796404529, + "binary_hash": 3906211175708501508, "filename": "ocr_test.pdf" }, "furniture": { @@ -18,7 +18,7 @@ "self_ref": "#/body", "children": [ { - "$ref": "#/texts/0" + "$ref": "#/tables/0" } ], "content_layer": "body", @@ -26,44 +26,592 @@ "label": "unspecified" }, "groups": [], - "texts": [ + "texts": [], + "pictures": [], + "tables": [ { - "self_ref": "#/texts/0", + "self_ref": "#/tables/0", "parent": { "$ref": "#/body" }, "children": [], "content_layer": "body", - "label": "text", + "label": "table", "prov": [ { "page_no": 1, "bbox": { - "l": 69.68, - "t": 764.92, - "r": 504.87, - "b": 689.01, + "l": 103.33, + "t": 519.86, + "r": 560.95, + "b": 234.07, "coord_origin": "BOTTOMLEFT" }, "charspan": [ 0, - 94 + 0 ] } ], - "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package" + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "bbox": { + "l": 245.02, + "t": 106.57, + "r": 307.59, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Column 0", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 358.65, + "t": 106.57, + "r": 421.22, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Column 1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 472.27, + "t": 106.57, + "r": 534.84, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 123.52, + "t": 174.07, + "r": 200.67, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "this is row 0", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 241.65, + "t": 174.07, + "r": 310.71, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 347.4, + "t": 174.07, + "r": 431.1, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 491.4, + "t": 174.07, + "r": 515.79, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 130.27, + "t": 242.7, + "r": 194.46, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and row 1", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 373.27, + "t": 242.7, + "r": 406.59, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 486.9, + "t": 242.7, + "r": 518.61, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 116.77, + "t": 315.82, + "r": 207.76, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and last row 2", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 251.77, + "t": 315.82, + "r": 299.73, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "nothing", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 484.65, + "t": 315.82, + "r": 522.85, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "inside", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 4, + "num_cols": 4, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 245.02, + "t": 106.57, + "r": 307.59, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Column 0", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 358.65, + "t": 106.57, + "r": 421.22, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Column 1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 472.27, + "t": 106.57, + "r": 534.84, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 2", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 123.52, + "t": 174.07, + "r": 200.67, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "this is row 0", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 241.65, + "t": 174.07, + "r": 310.71, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 347.4, + "t": 174.07, + "r": 431.1, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 491.4, + "t": 174.07, + "r": 515.79, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 130.27, + "t": 242.7, + "r": 194.46, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and row 1", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 373.27, + "t": 242.7, + "r": 406.59, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 486.9, + "t": 242.7, + "r": 518.61, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 116.77, + "t": 315.82, + "r": 207.76, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and last row 2", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 251.77, + "t": 315.82, + "r": 299.73, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "nothing", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 484.65, + "t": 315.82, + "r": 522.85, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "inside", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + }, + "annotations": [] } ], - "pictures": [], - "tables": [], "key_value_items": [], "form_items": [], "pages": { "1": { "size": { - "width": 595.2, - "height": 841.92 + "width": 842.0, + "height": 595.0 }, "page_no": 1 } diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test.pages.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test.pages.json index 093688be..e3613adc 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test.pages.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test.pages.json @@ -2,8 +2,8 @@ { "page_no": 0, "size": { - "width": 595.2, - "height": 841.92 + "width": 842.0, + "height": 595.0 }, "parsed_page": { "dimension": { @@ -11,47 +11,47 @@ "rect": { "r_x0": 0.0, "r_y0": 0.0, - "r_x1": 595.2, + "r_x1": 842.0, "r_y1": 0.0, - "r_x2": 595.2, - "r_y2": 841.92, + "r_x2": 842.0, + "r_y2": 595.0, "r_x3": 0.0, - "r_y3": 841.92, + "r_y3": 595.0, "coord_origin": "BOTTOMLEFT" }, "boundary_type": "crop_box", "art_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "bleed_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "crop_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "media_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "trim_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" } @@ -69,21 +69,21 @@ "a": 255 }, "rect": { - "r_x0": 73.35, - "r_y0": 98.0, - "r_x1": 503.65, - "r_y1": 98.0, - "r_x2": 503.65, - "r_y2": 77.0, - "r_x3": 73.35, - "r_y3": 77.0, + "r_x0": 245.02, + "r_y0": 120.29, + "r_x1": 307.59, + "r_y1": 120.29, + "r_x2": 307.59, + "r_y2": 106.57, + "r_x3": 245.02, + "r_y3": 106.57, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Column 0", + "orig": "Column 0", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -94,21 +94,21 @@ "a": 255 }, "rect": { - "r_x0": 69.68, - "r_y0": 124.83, - "r_x1": 504.87, - "r_y1": 124.83, - "r_x2": 504.87, - "r_y2": 104.0, - "r_x3": 69.68, - "r_y3": 104.0, + "r_x0": 358.65, + "r_y0": 120.29, + "r_x1": 421.22, + "r_y1": 120.29, + "r_x2": 421.22, + "r_y2": 106.57, + "r_x3": 358.65, + "r_y3": 106.57, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Column 1", + "orig": "Column 1", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 2, @@ -119,21 +119,271 @@ "a": 255 }, "rect": { - "r_x0": 71.84, - "r_y0": 152.91, - "r_x1": 153.09, - "r_y1": 152.91, - "r_x2": 153.09, - "r_y2": 129.8, - "r_x3": 71.84, - "r_y3": 129.8, + "r_x0": 472.27, + "r_y0": 120.29, + "r_x1": 534.84, + "r_y1": 120.29, + "r_x2": 534.84, + "r_y2": 106.57, + "r_x3": 472.27, + "r_y3": 106.57, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Column 2", + "orig": "Column 2", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 123.52, + "r_y0": 187.79, + "r_x1": 200.67, + "r_y1": 187.79, + "r_x2": 200.67, + "r_y2": 174.07, + "r_x3": 123.52, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 241.65, + "r_y0": 187.79, + "r_x1": 310.71, + "r_y1": 187.79, + "r_x2": 310.71, + "r_y2": 174.07, + "r_x3": 241.65, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 347.4, + "r_y0": 187.79, + "r_x1": 431.1, + "r_y1": 187.79, + "r_x2": 431.1, + "r_y2": 174.07, + "r_x3": 347.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 491.4, + "r_y0": 187.79, + "r_x1": 515.79, + "r_y1": 187.79, + "r_x2": 515.79, + "r_y2": 174.07, + "r_x3": 491.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 130.27, + "r_y0": 256.41, + "r_x1": 194.46, + "r_y1": 256.41, + "r_x2": 194.46, + "r_y2": 242.7, + "r_x3": 130.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 373.27, + "r_y0": 256.41, + "r_x1": 406.59, + "r_y1": 256.41, + "r_x2": 406.59, + "r_y2": 242.7, + "r_x3": 373.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 486.9, + "r_y0": 256.41, + "r_x1": 518.61, + "r_y1": 256.41, + "r_x2": 518.61, + "r_y2": 242.7, + "r_x3": 486.9, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 116.77, + "r_y0": 329.54, + "r_x1": 207.76, + "r_y1": 329.54, + "r_x2": 207.76, + "r_y2": 315.82, + "r_x3": 116.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 251.77, + "r_y0": 329.54, + "r_x1": 299.73, + "r_y1": 329.54, + "r_x2": 299.73, + "r_y2": 315.82, + "r_x3": 251.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 484.65, + "r_y0": 329.54, + "r_x1": 522.85, + "r_y1": 329.54, + "r_x2": 522.85, + "r_y2": 315.82, + "r_x3": 484.65, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], "has_chars": false, @@ -147,15 +397,15 @@ "clusters": [ { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 69.68, - "t": 77.0, - "r": 504.87, - "b": 152.91, + "l": 103.33, + "t": 75.14, + "r": 560.95, + "b": 360.93, "coord_origin": "TOPLEFT" }, - "confidence": 0.972, + "confidence": 0.968, "cells": [ { "index": 0, @@ -166,21 +416,21 @@ "a": 255 }, "rect": { - "r_x0": 73.35, - "r_y0": 98.0, - "r_x1": 503.65, - "r_y1": 98.0, - "r_x2": 503.65, - "r_y2": 77.0, - "r_x3": 73.35, - "r_y3": 77.0, + "r_x0": 245.02, + "r_y0": 120.29, + "r_x1": 307.59, + "r_y1": 120.29, + "r_x2": 307.59, + "r_y2": 106.57, + "r_x3": 245.02, + "r_y3": 106.57, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Column 0", + "orig": "Column 0", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -191,21 +441,21 @@ "a": 255 }, "rect": { - "r_x0": 69.68, - "r_y0": 124.83, - "r_x1": 504.87, - "r_y1": 124.83, - "r_x2": 504.87, - "r_y2": 104.0, - "r_x3": 69.68, - "r_y3": 104.0, + "r_x0": 358.65, + "r_y0": 120.29, + "r_x1": 421.22, + "r_y1": 120.29, + "r_x2": 421.22, + "r_y2": 106.57, + "r_x3": 358.65, + "r_y3": 106.57, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Column 1", + "orig": "Column 1", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 2, @@ -216,29 +466,1941 @@ "a": 255 }, "rect": { - "r_x0": 71.84, - "r_y0": 152.91, - "r_x1": 153.09, - "r_y1": 152.91, - "r_x2": 153.09, - "r_y2": 129.8, - "r_x3": 71.84, - "r_y3": 129.8, + "r_x0": 472.27, + "r_y0": 120.29, + "r_x1": 534.84, + "r_y1": 120.29, + "r_x2": 534.84, + "r_y2": 106.57, + "r_x3": 472.27, + "r_y3": 106.57, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Column 2", + "orig": "Column 2", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 123.52, + "r_y0": 187.79, + "r_x1": 200.67, + "r_y1": 187.79, + "r_x2": 200.67, + "r_y2": 174.07, + "r_x3": 123.52, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 241.65, + "r_y0": 187.79, + "r_x1": 310.71, + "r_y1": 187.79, + "r_x2": 310.71, + "r_y2": 174.07, + "r_x3": 241.65, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 347.4, + "r_y0": 187.79, + "r_x1": 431.1, + "r_y1": 187.79, + "r_x2": 431.1, + "r_y2": 174.07, + "r_x3": 347.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 491.4, + "r_y0": 187.79, + "r_x1": 515.79, + "r_y1": 187.79, + "r_x2": 515.79, + "r_y2": 174.07, + "r_x3": 491.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 130.27, + "r_y0": 256.41, + "r_x1": 194.46, + "r_y1": 256.41, + "r_x2": 194.46, + "r_y2": 242.7, + "r_x3": 130.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 373.27, + "r_y0": 256.41, + "r_x1": 406.59, + "r_y1": 256.41, + "r_x2": 406.59, + "r_y2": 242.7, + "r_x3": 373.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 486.9, + "r_y0": 256.41, + "r_x1": 518.61, + "r_y1": 256.41, + "r_x2": 518.61, + "r_y2": 242.7, + "r_x3": 486.9, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 116.77, + "r_y0": 329.54, + "r_x1": 207.76, + "r_y1": 329.54, + "r_x2": 207.76, + "r_y2": 315.82, + "r_x3": 116.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 251.77, + "r_y0": 329.54, + "r_x1": 299.73, + "r_y1": 329.54, + "r_x2": 299.73, + "r_y2": 315.82, + "r_x3": 251.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 484.65, + "r_y0": 329.54, + "r_x1": 522.85, + "r_y1": 329.54, + "r_x2": 522.85, + "r_y2": 315.82, + "r_x3": 484.65, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], - "children": [] + "children": [ + { + "id": 14, + "label": "text", + "bbox": { + "l": 245.02, + "t": 106.57, + "r": 307.59, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 245.02, + "r_y0": 120.29, + "r_x1": 307.59, + "r_y1": 120.29, + "r_x2": 307.59, + "r_y2": 106.57, + "r_x3": 245.02, + "r_y3": 106.57, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 358.65, + "t": 106.57, + "r": 421.22, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 358.65, + "r_y0": 120.29, + "r_x1": 421.22, + "r_y1": 120.29, + "r_x2": 421.22, + "r_y2": 106.57, + "r_x3": 358.65, + "r_y3": 106.57, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 472.27, + "t": 106.57, + "r": 534.84, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 472.27, + "r_y0": 120.29, + "r_x1": 534.84, + "r_y1": 120.29, + "r_x2": 534.84, + "r_y2": 106.57, + "r_x3": 472.27, + "r_y3": 106.57, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 17, + "label": "text", + "bbox": { + "l": 123.52, + "t": 174.07, + "r": 200.67, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 123.52, + "r_y0": 187.79, + "r_x1": 200.67, + "r_y1": 187.79, + "r_x2": 200.67, + "r_y2": 174.07, + "r_x3": 123.52, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 18, + "label": "text", + "bbox": { + "l": 241.65, + "t": 174.07, + "r": 310.71, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 241.65, + "r_y0": 187.79, + "r_x1": 310.71, + "r_y1": 187.79, + "r_x2": 310.71, + "r_y2": 174.07, + "r_x3": 241.65, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 19, + "label": "text", + "bbox": { + "l": 347.4, + "t": 174.07, + "r": 431.1, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 347.4, + "r_y0": 187.79, + "r_x1": 431.1, + "r_y1": 187.79, + "r_x2": 431.1, + "r_y2": 174.07, + "r_x3": 347.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 20, + "label": "text", + "bbox": { + "l": 491.4, + "t": 174.07, + "r": 515.79, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 491.4, + "r_y0": 187.79, + "r_x1": 515.79, + "r_y1": 187.79, + "r_x2": 515.79, + "r_y2": 174.07, + "r_x3": 491.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 21, + "label": "text", + "bbox": { + "l": 130.27, + "t": 242.7, + "r": 194.46, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 130.27, + "r_y0": 256.41, + "r_x1": 194.46, + "r_y1": 256.41, + "r_x2": 194.46, + "r_y2": 242.7, + "r_x3": 130.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 22, + "label": "text", + "bbox": { + "l": 373.27, + "t": 242.7, + "r": 406.59, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 373.27, + "r_y0": 256.41, + "r_x1": 406.59, + "r_y1": 256.41, + "r_x2": 406.59, + "r_y2": 242.7, + "r_x3": 373.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 23, + "label": "text", + "bbox": { + "l": 486.9, + "t": 242.7, + "r": 518.61, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 486.9, + "r_y0": 256.41, + "r_x1": 518.61, + "r_y1": 256.41, + "r_x2": 518.61, + "r_y2": 242.7, + "r_x3": 486.9, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 24, + "label": "text", + "bbox": { + "l": 116.77, + "t": 315.82, + "r": 207.76, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 116.77, + "r_y0": 329.54, + "r_x1": 207.76, + "r_y1": 329.54, + "r_x2": 207.76, + "r_y2": 315.82, + "r_x3": 116.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 25, + "label": "text", + "bbox": { + "l": 251.77, + "t": 315.82, + "r": 299.73, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 251.77, + "r_y0": 329.54, + "r_x1": 299.73, + "r_y1": 329.54, + "r_x2": 299.73, + "r_y2": 315.82, + "r_x3": 251.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 26, + "label": "text", + "bbox": { + "l": 484.65, + "t": 315.82, + "r": 522.85, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 484.65, + "r_y0": 329.54, + "r_x1": 522.85, + "r_y1": 329.54, + "r_x2": 522.85, + "r_y2": 315.82, + "r_x3": 484.65, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] } ] }, "tablestructure": { - "table_map": {} + "table_map": { + "0": { + "label": "table", + "id": 0, + "page_no": 0, + "cluster": { + "id": 0, + "label": "table", + "bbox": { + "l": 103.33, + "t": 75.14, + "r": 560.95, + "b": 360.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.968, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 245.02, + "r_y0": 120.29, + "r_x1": 307.59, + "r_y1": 120.29, + "r_x2": 307.59, + "r_y2": 106.57, + "r_x3": 245.02, + "r_y3": 106.57, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 358.65, + "r_y0": 120.29, + "r_x1": 421.22, + "r_y1": 120.29, + "r_x2": 421.22, + "r_y2": 106.57, + "r_x3": 358.65, + "r_y3": 106.57, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 472.27, + "r_y0": 120.29, + "r_x1": 534.84, + "r_y1": 120.29, + "r_x2": 534.84, + "r_y2": 106.57, + "r_x3": 472.27, + "r_y3": 106.57, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 123.52, + "r_y0": 187.79, + "r_x1": 200.67, + "r_y1": 187.79, + "r_x2": 200.67, + "r_y2": 174.07, + "r_x3": 123.52, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 241.65, + "r_y0": 187.79, + "r_x1": 310.71, + "r_y1": 187.79, + "r_x2": 310.71, + "r_y2": 174.07, + "r_x3": 241.65, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 347.4, + "r_y0": 187.79, + "r_x1": 431.1, + "r_y1": 187.79, + "r_x2": 431.1, + "r_y2": 174.07, + "r_x3": 347.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 491.4, + "r_y0": 187.79, + "r_x1": 515.79, + "r_y1": 187.79, + "r_x2": 515.79, + "r_y2": 174.07, + "r_x3": 491.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 130.27, + "r_y0": 256.41, + "r_x1": 194.46, + "r_y1": 256.41, + "r_x2": 194.46, + "r_y2": 242.7, + "r_x3": 130.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 373.27, + "r_y0": 256.41, + "r_x1": 406.59, + "r_y1": 256.41, + "r_x2": 406.59, + "r_y2": 242.7, + "r_x3": 373.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 486.9, + "r_y0": 256.41, + "r_x1": 518.61, + "r_y1": 256.41, + "r_x2": 518.61, + "r_y2": 242.7, + "r_x3": 486.9, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 116.77, + "r_y0": 329.54, + "r_x1": 207.76, + "r_y1": 329.54, + "r_x2": 207.76, + "r_y2": 315.82, + "r_x3": 116.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 251.77, + "r_y0": 329.54, + "r_x1": 299.73, + "r_y1": 329.54, + "r_x2": 299.73, + "r_y2": 315.82, + "r_x3": 251.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 484.65, + "r_y0": 329.54, + "r_x1": 522.85, + "r_y1": 329.54, + "r_x2": 522.85, + "r_y2": 315.82, + "r_x3": 484.65, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [ + { + "id": 14, + "label": "text", + "bbox": { + "l": 245.02, + "t": 106.57, + "r": 307.59, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 245.02, + "r_y0": 120.29, + "r_x1": 307.59, + "r_y1": 120.29, + "r_x2": 307.59, + "r_y2": 106.57, + "r_x3": 245.02, + "r_y3": 106.57, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 358.65, + "t": 106.57, + "r": 421.22, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 358.65, + "r_y0": 120.29, + "r_x1": 421.22, + "r_y1": 120.29, + "r_x2": 421.22, + "r_y2": 106.57, + "r_x3": 358.65, + "r_y3": 106.57, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 472.27, + "t": 106.57, + "r": 534.84, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 472.27, + "r_y0": 120.29, + "r_x1": 534.84, + "r_y1": 120.29, + "r_x2": 534.84, + "r_y2": 106.57, + "r_x3": 472.27, + "r_y3": 106.57, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 17, + "label": "text", + "bbox": { + "l": 123.52, + "t": 174.07, + "r": 200.67, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 123.52, + "r_y0": 187.79, + "r_x1": 200.67, + "r_y1": 187.79, + "r_x2": 200.67, + "r_y2": 174.07, + "r_x3": 123.52, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 18, + "label": "text", + "bbox": { + "l": 241.65, + "t": 174.07, + "r": 310.71, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 241.65, + "r_y0": 187.79, + "r_x1": 310.71, + "r_y1": 187.79, + "r_x2": 310.71, + "r_y2": 174.07, + "r_x3": 241.65, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 19, + "label": "text", + "bbox": { + "l": 347.4, + "t": 174.07, + "r": 431.1, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 347.4, + "r_y0": 187.79, + "r_x1": 431.1, + "r_y1": 187.79, + "r_x2": 431.1, + "r_y2": 174.07, + "r_x3": 347.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 20, + "label": "text", + "bbox": { + "l": 491.4, + "t": 174.07, + "r": 515.79, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 491.4, + "r_y0": 187.79, + "r_x1": 515.79, + "r_y1": 187.79, + "r_x2": 515.79, + "r_y2": 174.07, + "r_x3": 491.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 21, + "label": "text", + "bbox": { + "l": 130.27, + "t": 242.7, + "r": 194.46, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 130.27, + "r_y0": 256.41, + "r_x1": 194.46, + "r_y1": 256.41, + "r_x2": 194.46, + "r_y2": 242.7, + "r_x3": 130.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 22, + "label": "text", + "bbox": { + "l": 373.27, + "t": 242.7, + "r": 406.59, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 373.27, + "r_y0": 256.41, + "r_x1": 406.59, + "r_y1": 256.41, + "r_x2": 406.59, + "r_y2": 242.7, + "r_x3": 373.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 23, + "label": "text", + "bbox": { + "l": 486.9, + "t": 242.7, + "r": 518.61, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 486.9, + "r_y0": 256.41, + "r_x1": 518.61, + "r_y1": 256.41, + "r_x2": 518.61, + "r_y2": 242.7, + "r_x3": 486.9, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 24, + "label": "text", + "bbox": { + "l": 116.77, + "t": 315.82, + "r": 207.76, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 116.77, + "r_y0": 329.54, + "r_x1": 207.76, + "r_y1": 329.54, + "r_x2": 207.76, + "r_y2": 315.82, + "r_x3": 116.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 25, + "label": "text", + "bbox": { + "l": 251.77, + "t": 315.82, + "r": 299.73, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 251.77, + "r_y0": 329.54, + "r_x1": 299.73, + "r_y1": 329.54, + "r_x2": 299.73, + "r_y2": 315.82, + "r_x3": 251.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 26, + "label": "text", + "bbox": { + "l": 484.65, + "t": 315.82, + "r": 522.85, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 484.65, + "r_y0": 329.54, + "r_x1": 522.85, + "r_y1": 329.54, + "r_x2": 522.85, + "r_y2": 315.82, + "r_x3": 484.65, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "ecel", + "ched", + "ched", + "ched", + "nl", + "rhed", + "fcel", + "fcel", + "fcel", + "nl", + "rhed", + "ecel", + "fcel", + "fcel", + "nl", + "rhed", + "fcel", + "ecel", + "fcel", + "nl" + ], + "num_rows": 4, + "num_cols": 4, + "table_cells": [ + { + "bbox": { + "l": 245.02, + "t": 106.57, + "r": 307.59, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Column 0", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 358.65, + "t": 106.57, + "r": 421.22, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Column 1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 472.27, + "t": 106.57, + "r": 534.84, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 123.52, + "t": 174.07, + "r": 200.67, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "this is row 0", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 241.65, + "t": 174.07, + "r": 310.71, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 347.4, + "t": 174.07, + "r": 431.1, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 491.4, + "t": 174.07, + "r": 515.79, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 130.27, + "t": 242.7, + "r": 194.46, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and row 1", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 373.27, + "t": 242.7, + "r": 406.59, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 486.9, + "t": 242.7, + "r": 518.61, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 116.77, + "t": 315.82, + "r": 207.76, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and last row 2", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 251.77, + "t": 315.82, + "r": 299.73, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "nothing", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 484.65, + "t": 315.82, + "r": 522.85, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "inside", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + } + } }, "figures_classification": null, "equations_prediction": null, @@ -247,20 +2409,20 @@ "assembled": { "elements": [ { - "label": "text", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 69.68, - "t": 77.0, - "r": 504.87, - "b": 152.91, + "l": 103.33, + "t": 75.14, + "r": 560.95, + "b": 360.93, "coord_origin": "TOPLEFT" }, - "confidence": 0.972, + "confidence": 0.968, "cells": [ { "index": 0, @@ -271,21 +2433,21 @@ "a": 255 }, "rect": { - "r_x0": 73.35, - "r_y0": 98.0, - "r_x1": 503.65, - "r_y1": 98.0, - "r_x2": 503.65, - "r_y2": 77.0, - "r_x3": 73.35, - "r_y3": 77.0, + "r_x0": 245.02, + "r_y0": 120.29, + "r_x1": 307.59, + "r_y1": 120.29, + "r_x2": 307.59, + "r_y2": 106.57, + "r_x3": 245.02, + "r_y3": 106.57, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Column 0", + "orig": "Column 0", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -296,21 +2458,21 @@ "a": 255 }, "rect": { - "r_x0": 69.68, - "r_y0": 124.83, - "r_x1": 504.87, - "r_y1": 124.83, - "r_x2": 504.87, - "r_y2": 104.0, - "r_x3": 69.68, - "r_y3": 104.0, + "r_x0": 358.65, + "r_y0": 120.29, + "r_x1": 421.22, + "r_y1": 120.29, + "r_x2": 421.22, + "r_y2": 106.57, + "r_x3": 358.65, + "r_y3": 106.57, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Column 1", + "orig": "Column 1", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 2, @@ -321,44 +2483,1088 @@ "a": 255 }, "rect": { - "r_x0": 71.84, - "r_y0": 152.91, - "r_x1": 153.09, - "r_y1": 152.91, - "r_x2": 153.09, - "r_y2": 129.8, - "r_x3": 71.84, - "r_y3": 129.8, + "r_x0": 472.27, + "r_y0": 120.29, + "r_x1": 534.84, + "r_y1": 120.29, + "r_x2": 534.84, + "r_y2": 106.57, + "r_x3": 472.27, + "r_y3": 106.57, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Column 2", + "orig": "Column 2", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 123.52, + "r_y0": 187.79, + "r_x1": 200.67, + "r_y1": 187.79, + "r_x2": 200.67, + "r_y2": 174.07, + "r_x3": 123.52, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 241.65, + "r_y0": 187.79, + "r_x1": 310.71, + "r_y1": 187.79, + "r_x2": 310.71, + "r_y2": 174.07, + "r_x3": 241.65, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 347.4, + "r_y0": 187.79, + "r_x1": 431.1, + "r_y1": 187.79, + "r_x2": 431.1, + "r_y2": 174.07, + "r_x3": 347.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 491.4, + "r_y0": 187.79, + "r_x1": 515.79, + "r_y1": 187.79, + "r_x2": 515.79, + "r_y2": 174.07, + "r_x3": 491.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 130.27, + "r_y0": 256.41, + "r_x1": 194.46, + "r_y1": 256.41, + "r_x2": 194.46, + "r_y2": 242.7, + "r_x3": 130.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 373.27, + "r_y0": 256.41, + "r_x1": 406.59, + "r_y1": 256.41, + "r_x2": 406.59, + "r_y2": 242.7, + "r_x3": 373.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 486.9, + "r_y0": 256.41, + "r_x1": 518.61, + "r_y1": 256.41, + "r_x2": 518.61, + "r_y2": 242.7, + "r_x3": 486.9, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 116.77, + "r_y0": 329.54, + "r_x1": 207.76, + "r_y1": 329.54, + "r_x2": 207.76, + "r_y2": 315.82, + "r_x3": 116.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 251.77, + "r_y0": 329.54, + "r_x1": 299.73, + "r_y1": 329.54, + "r_x2": 299.73, + "r_y2": 315.82, + "r_x3": 251.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 484.65, + "r_y0": 329.54, + "r_x1": 522.85, + "r_y1": 329.54, + "r_x2": 522.85, + "r_y2": 315.82, + "r_x3": 484.65, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], - "children": [] + "children": [ + { + "id": 14, + "label": "text", + "bbox": { + "l": 245.02, + "t": 106.57, + "r": 307.59, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 245.02, + "r_y0": 120.29, + "r_x1": 307.59, + "r_y1": 120.29, + "r_x2": 307.59, + "r_y2": 106.57, + "r_x3": 245.02, + "r_y3": 106.57, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 358.65, + "t": 106.57, + "r": 421.22, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 358.65, + "r_y0": 120.29, + "r_x1": 421.22, + "r_y1": 120.29, + "r_x2": 421.22, + "r_y2": 106.57, + "r_x3": 358.65, + "r_y3": 106.57, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 472.27, + "t": 106.57, + "r": 534.84, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 472.27, + "r_y0": 120.29, + "r_x1": 534.84, + "r_y1": 120.29, + "r_x2": 534.84, + "r_y2": 106.57, + "r_x3": 472.27, + "r_y3": 106.57, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 17, + "label": "text", + "bbox": { + "l": 123.52, + "t": 174.07, + "r": 200.67, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 123.52, + "r_y0": 187.79, + "r_x1": 200.67, + "r_y1": 187.79, + "r_x2": 200.67, + "r_y2": 174.07, + "r_x3": 123.52, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 18, + "label": "text", + "bbox": { + "l": 241.65, + "t": 174.07, + "r": 310.71, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 241.65, + "r_y0": 187.79, + "r_x1": 310.71, + "r_y1": 187.79, + "r_x2": 310.71, + "r_y2": 174.07, + "r_x3": 241.65, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 19, + "label": "text", + "bbox": { + "l": 347.4, + "t": 174.07, + "r": 431.1, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 347.4, + "r_y0": 187.79, + "r_x1": 431.1, + "r_y1": 187.79, + "r_x2": 431.1, + "r_y2": 174.07, + "r_x3": 347.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 20, + "label": "text", + "bbox": { + "l": 491.4, + "t": 174.07, + "r": 515.79, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 491.4, + "r_y0": 187.79, + "r_x1": 515.79, + "r_y1": 187.79, + "r_x2": 515.79, + "r_y2": 174.07, + "r_x3": 491.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 21, + "label": "text", + "bbox": { + "l": 130.27, + "t": 242.7, + "r": 194.46, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 130.27, + "r_y0": 256.41, + "r_x1": 194.46, + "r_y1": 256.41, + "r_x2": 194.46, + "r_y2": 242.7, + "r_x3": 130.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 22, + "label": "text", + "bbox": { + "l": 373.27, + "t": 242.7, + "r": 406.59, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 373.27, + "r_y0": 256.41, + "r_x1": 406.59, + "r_y1": 256.41, + "r_x2": 406.59, + "r_y2": 242.7, + "r_x3": 373.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 23, + "label": "text", + "bbox": { + "l": 486.9, + "t": 242.7, + "r": 518.61, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 486.9, + "r_y0": 256.41, + "r_x1": 518.61, + "r_y1": 256.41, + "r_x2": 518.61, + "r_y2": 242.7, + "r_x3": 486.9, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 24, + "label": "text", + "bbox": { + "l": 116.77, + "t": 315.82, + "r": 207.76, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 116.77, + "r_y0": 329.54, + "r_x1": 207.76, + "r_y1": 329.54, + "r_x2": 207.76, + "r_y2": 315.82, + "r_x3": 116.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 25, + "label": "text", + "bbox": { + "l": 251.77, + "t": 315.82, + "r": 299.73, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 251.77, + "r_y0": 329.54, + "r_x1": 299.73, + "r_y1": 329.54, + "r_x2": 299.73, + "r_y2": 315.82, + "r_x3": 251.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 26, + "label": "text", + "bbox": { + "l": 484.65, + "t": 315.82, + "r": 522.85, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 484.65, + "r_y0": 329.54, + "r_x1": 522.85, + "r_y1": 329.54, + "r_x2": 522.85, + "r_y2": 315.82, + "r_x3": 484.65, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package" + "text": null, + "otsl_seq": [ + "ecel", + "ched", + "ched", + "ched", + "nl", + "rhed", + "fcel", + "fcel", + "fcel", + "nl", + "rhed", + "ecel", + "fcel", + "fcel", + "nl", + "rhed", + "fcel", + "ecel", + "fcel", + "nl" + ], + "num_rows": 4, + "num_cols": 4, + "table_cells": [ + { + "bbox": { + "l": 245.02, + "t": 106.57, + "r": 307.59, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Column 0", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 358.65, + "t": 106.57, + "r": 421.22, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Column 1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 472.27, + "t": 106.57, + "r": 534.84, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 123.52, + "t": 174.07, + "r": 200.67, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "this is row 0", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 241.65, + "t": 174.07, + "r": 310.71, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 347.4, + "t": 174.07, + "r": 431.1, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 491.4, + "t": 174.07, + "r": 515.79, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 130.27, + "t": 242.7, + "r": 194.46, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and row 1", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 373.27, + "t": 242.7, + "r": 406.59, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 486.9, + "t": 242.7, + "r": 518.61, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 116.77, + "t": 315.82, + "r": 207.76, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and last row 2", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 251.77, + "t": 315.82, + "r": 299.73, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "nothing", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 484.65, + "t": 315.82, + "r": 522.85, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "inside", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } ], "body": [ { - "label": "text", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 69.68, - "t": 77.0, - "r": 504.87, - "b": 152.91, + "l": 103.33, + "t": 75.14, + "r": 560.95, + "b": 360.93, "coord_origin": "TOPLEFT" }, - "confidence": 0.972, + "confidence": 0.968, "cells": [ { "index": 0, @@ -369,21 +3575,21 @@ "a": 255 }, "rect": { - "r_x0": 73.35, - "r_y0": 98.0, - "r_x1": 503.65, - "r_y1": 98.0, - "r_x2": 503.65, - "r_y2": 77.0, - "r_x3": 73.35, - "r_y3": 77.0, + "r_x0": 245.02, + "r_y0": 120.29, + "r_x1": 307.59, + "r_y1": 120.29, + "r_x2": 307.59, + "r_y2": 106.57, + "r_x3": 245.02, + "r_y3": 106.57, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Column 0", + "orig": "Column 0", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -394,21 +3600,21 @@ "a": 255 }, "rect": { - "r_x0": 69.68, - "r_y0": 124.83, - "r_x1": 504.87, - "r_y1": 124.83, - "r_x2": 504.87, - "r_y2": 104.0, - "r_x3": 69.68, - "r_y3": 104.0, + "r_x0": 358.65, + "r_y0": 120.29, + "r_x1": 421.22, + "r_y1": 120.29, + "r_x2": 421.22, + "r_y2": 106.57, + "r_x3": 358.65, + "r_y3": 106.57, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Column 1", + "orig": "Column 1", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 2, @@ -419,26 +3625,1070 @@ "a": 255 }, "rect": { - "r_x0": 71.84, - "r_y0": 152.91, - "r_x1": 153.09, - "r_y1": 152.91, - "r_x2": 153.09, - "r_y2": 129.8, - "r_x3": 71.84, - "r_y3": 129.8, + "r_x0": 472.27, + "r_y0": 120.29, + "r_x1": 534.84, + "r_y1": 120.29, + "r_x2": 534.84, + "r_y2": 106.57, + "r_x3": 472.27, + "r_y3": 106.57, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Column 2", + "orig": "Column 2", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 123.52, + "r_y0": 187.79, + "r_x1": 200.67, + "r_y1": 187.79, + "r_x2": 200.67, + "r_y2": 174.07, + "r_x3": 123.52, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 241.65, + "r_y0": 187.79, + "r_x1": 310.71, + "r_y1": 187.79, + "r_x2": 310.71, + "r_y2": 174.07, + "r_x3": 241.65, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 347.4, + "r_y0": 187.79, + "r_x1": 431.1, + "r_y1": 187.79, + "r_x2": 431.1, + "r_y2": 174.07, + "r_x3": 347.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 491.4, + "r_y0": 187.79, + "r_x1": 515.79, + "r_y1": 187.79, + "r_x2": 515.79, + "r_y2": 174.07, + "r_x3": 491.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 130.27, + "r_y0": 256.41, + "r_x1": 194.46, + "r_y1": 256.41, + "r_x2": 194.46, + "r_y2": 242.7, + "r_x3": 130.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 373.27, + "r_y0": 256.41, + "r_x1": 406.59, + "r_y1": 256.41, + "r_x2": 406.59, + "r_y2": 242.7, + "r_x3": 373.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 486.9, + "r_y0": 256.41, + "r_x1": 518.61, + "r_y1": 256.41, + "r_x2": 518.61, + "r_y2": 242.7, + "r_x3": 486.9, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 116.77, + "r_y0": 329.54, + "r_x1": 207.76, + "r_y1": 329.54, + "r_x2": 207.76, + "r_y2": 315.82, + "r_x3": 116.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 251.77, + "r_y0": 329.54, + "r_x1": 299.73, + "r_y1": 329.54, + "r_x2": 299.73, + "r_y2": 315.82, + "r_x3": 251.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 484.65, + "r_y0": 329.54, + "r_x1": 522.85, + "r_y1": 329.54, + "r_x2": 522.85, + "r_y2": 315.82, + "r_x3": 484.65, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], - "children": [] + "children": [ + { + "id": 14, + "label": "text", + "bbox": { + "l": 245.02, + "t": 106.57, + "r": 307.59, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 245.02, + "r_y0": 120.29, + "r_x1": 307.59, + "r_y1": 120.29, + "r_x2": 307.59, + "r_y2": 106.57, + "r_x3": 245.02, + "r_y3": 106.57, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 358.65, + "t": 106.57, + "r": 421.22, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 358.65, + "r_y0": 120.29, + "r_x1": 421.22, + "r_y1": 120.29, + "r_x2": 421.22, + "r_y2": 106.57, + "r_x3": 358.65, + "r_y3": 106.57, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 472.27, + "t": 106.57, + "r": 534.84, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 472.27, + "r_y0": 120.29, + "r_x1": 534.84, + "r_y1": 120.29, + "r_x2": 534.84, + "r_y2": 106.57, + "r_x3": 472.27, + "r_y3": 106.57, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 17, + "label": "text", + "bbox": { + "l": 123.52, + "t": 174.07, + "r": 200.67, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 123.52, + "r_y0": 187.79, + "r_x1": 200.67, + "r_y1": 187.79, + "r_x2": 200.67, + "r_y2": 174.07, + "r_x3": 123.52, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 18, + "label": "text", + "bbox": { + "l": 241.65, + "t": 174.07, + "r": 310.71, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 241.65, + "r_y0": 187.79, + "r_x1": 310.71, + "r_y1": 187.79, + "r_x2": 310.71, + "r_y2": 174.07, + "r_x3": 241.65, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 19, + "label": "text", + "bbox": { + "l": 347.4, + "t": 174.07, + "r": 431.1, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 347.4, + "r_y0": 187.79, + "r_x1": 431.1, + "r_y1": 187.79, + "r_x2": 431.1, + "r_y2": 174.07, + "r_x3": 347.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 20, + "label": "text", + "bbox": { + "l": 491.4, + "t": 174.07, + "r": 515.79, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 491.4, + "r_y0": 187.79, + "r_x1": 515.79, + "r_y1": 187.79, + "r_x2": 515.79, + "r_y2": 174.07, + "r_x3": 491.4, + "r_y3": 174.07, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 21, + "label": "text", + "bbox": { + "l": 130.27, + "t": 242.7, + "r": 194.46, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 130.27, + "r_y0": 256.41, + "r_x1": 194.46, + "r_y1": 256.41, + "r_x2": 194.46, + "r_y2": 242.7, + "r_x3": 130.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 22, + "label": "text", + "bbox": { + "l": 373.27, + "t": 242.7, + "r": 406.59, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 373.27, + "r_y0": 256.41, + "r_x1": 406.59, + "r_y1": 256.41, + "r_x2": 406.59, + "r_y2": 242.7, + "r_x3": 373.27, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 23, + "label": "text", + "bbox": { + "l": 486.9, + "t": 242.7, + "r": 518.61, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 486.9, + "r_y0": 256.41, + "r_x1": 518.61, + "r_y1": 256.41, + "r_x2": 518.61, + "r_y2": 242.7, + "r_x3": 486.9, + "r_y3": 242.7, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 24, + "label": "text", + "bbox": { + "l": 116.77, + "t": 315.82, + "r": 207.76, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 116.77, + "r_y0": 329.54, + "r_x1": 207.76, + "r_y1": 329.54, + "r_x2": 207.76, + "r_y2": 315.82, + "r_x3": 116.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 25, + "label": "text", + "bbox": { + "l": 251.77, + "t": 315.82, + "r": 299.73, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 251.77, + "r_y0": 329.54, + "r_x1": 299.73, + "r_y1": 329.54, + "r_x2": 299.73, + "r_y2": 315.82, + "r_x3": 251.77, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 26, + "label": "text", + "bbox": { + "l": 484.65, + "t": 315.82, + "r": 522.85, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 484.65, + "r_y0": 329.54, + "r_x1": 522.85, + "r_y1": 329.54, + "r_x2": 522.85, + "r_y2": 315.82, + "r_x3": 484.65, + "r_y3": 315.82, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package" + "text": null, + "otsl_seq": [ + "ecel", + "ched", + "ched", + "ched", + "nl", + "rhed", + "fcel", + "fcel", + "fcel", + "nl", + "rhed", + "ecel", + "fcel", + "fcel", + "nl", + "rhed", + "fcel", + "ecel", + "fcel", + "nl" + ], + "num_rows": 4, + "num_cols": 4, + "table_cells": [ + { + "bbox": { + "l": 245.02, + "t": 106.57, + "r": 307.59, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Column 0", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 358.65, + "t": 106.57, + "r": 421.22, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Column 1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 472.27, + "t": 106.57, + "r": 534.84, + "b": 120.29, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 123.52, + "t": 174.07, + "r": 200.67, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "this is row 0", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 241.65, + "t": 174.07, + "r": 310.71, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 347.4, + "t": 174.07, + "r": 431.1, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 491.4, + "t": 174.07, + "r": 515.79, + "b": 187.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 130.27, + "t": 242.7, + "r": 194.46, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and row 1", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 373.27, + "t": 242.7, + "r": 406.59, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 486.9, + "t": 242.7, + "r": 518.61, + "b": 256.41, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 116.77, + "t": 315.82, + "r": 207.76, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and last row 2", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 251.77, + "t": 315.82, + "r": 299.73, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "nothing", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 484.65, + "t": 315.82, + "r": 522.85, + "b": 329.54, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "inside", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } ], "headers": [] diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.doctags.txt b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.doctags.txt index 405aa96e..0eab0ecc 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.doctags.txt +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.doctags.txt @@ -1,3 +1,2 @@ -package -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained +insidenothingand last row 2haveotherand row 1andhave contentsome cellsthis is row 0Column 2Column 1Column 0 \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.json index c282ed1d..aec34f31 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.json @@ -4,7 +4,7 @@ "name": "ocr_test_rotated_180", "origin": { "mimetype": "application/pdf", - "binary_hash": 2530576989861832966, + "binary_hash": 9953198396702586979, "filename": "ocr_test_rotated_180.pdf" }, "furniture": { @@ -18,10 +18,7 @@ "self_ref": "#/body", "children": [ { - "$ref": "#/texts/0" - }, - { - "$ref": "#/texts/1" + "$ref": "#/tables/0" } ], "content_layer": "body", @@ -29,71 +26,592 @@ "label": "unspecified" }, "groups": [], - "texts": [ + "texts": [], + "pictures": [], + "tables": [ { - "self_ref": "#/texts/0", + "self_ref": "#/tables/0", "parent": { "$ref": "#/body" }, "children": [], "content_layer": "body", - "label": "text", + "label": "table", "prov": [ { "page_no": 1, "bbox": { - "l": 441.26, - "t": 151.88, - "r": 522.03, - "b": 131.89, + "l": 280.59, + "t": 361.27, + "r": 738.57, + "b": 75.91, "coord_origin": "BOTTOMLEFT" }, "charspan": [ 0, - 7 + 0 ] } ], - "orig": "package", - "text": "package" - }, - { - "self_ref": "#/texts/1", - "parent": { - "$ref": "#/body" + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "bbox": { + "l": 534.41, + "t": 474.71, + "r": 596.97, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Column 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 420.78, + "t": 474.71, + "r": 483.35, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Column 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 307.16, + "t": 474.71, + "r": 369.73, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 641.33, + "t": 407.21, + "r": 718.47, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "this is row 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 531.29, + "t": 407.21, + "r": 600.35, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 410.9, + "t": 407.21, + "r": 494.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 326.21, + "t": 407.21, + "r": 350.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 647.54, + "t": 338.59, + "r": 711.72, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and row 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 435.41, + "t": 338.59, + "r": 468.73, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 323.39, + "t": 338.59, + "r": 355.1, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 634.24, + "t": 265.46, + "r": 725.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and last row 2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 542.27, + "t": 265.46, + "r": 590.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "nothing", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 319.15, + "t": 265.46, + "r": 357.35, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "inside", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + "num_rows": 4, + "num_cols": 4, + "grid": [ + [ + { + "bbox": { + "l": 319.15, + "t": 265.46, + "r": 357.35, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "inside", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 542.27, + "t": 265.46, + "r": 590.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "nothing", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 634.24, + "t": 265.46, + "r": 725.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and last row 2", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 323.39, + "t": 338.59, + "r": 355.1, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 435.41, + "t": 338.59, + "r": 468.73, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 647.54, + "t": 338.59, + "r": 711.72, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and row 1", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 326.21, + "t": 407.21, + "r": 350.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 410.9, + "t": 407.21, + "r": 494.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 531.29, + "t": 407.21, + "r": 600.35, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 641.33, + "t": 407.21, + "r": 718.47, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "this is row 0", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 307.16, + "t": 474.71, + "r": 369.73, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 420.78, + "t": 474.71, + "r": 483.35, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Column 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 534.41, + "t": 474.71, + "r": 596.97, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Column 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [ - { - "page_no": 1, - "bbox": { - "l": 89.24, - "t": 124.75, - "r": 523.21, - "b": 77.02, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 86 - ] - } - ], - "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" + "annotations": [] } ], - "pictures": [], - "tables": [], "key_value_items": [], "form_items": [], "pages": { "1": { "size": { - "width": 595.2, - "height": 841.92 + "width": 842.0, + "height": 595.0 }, "page_no": 1 } diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.pages.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.pages.json index 3001a46f..256df68f 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.pages.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.pages.json @@ -2,8 +2,8 @@ { "page_no": 0, "size": { - "width": 595.2, - "height": 841.92 + "width": 842.0, + "height": 595.0 }, "parsed_page": { "dimension": { @@ -11,47 +11,47 @@ "rect": { "r_x0": 0.0, "r_y0": 0.0, - "r_x1": 595.2, + "r_x1": 842.0, "r_y1": 0.0, - "r_x2": 595.2, - "r_y2": 841.92, + "r_x2": 842.0, + "r_y2": 595.0, "r_x3": 0.0, - "r_y3": 841.92, + "r_y3": 595.0, "coord_origin": "BOTTOMLEFT" }, "boundary_type": "crop_box", "art_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "bleed_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "crop_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "media_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "trim_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" } @@ -69,21 +69,21 @@ "a": 255 }, "rect": { - "r_x0": 89.24, - "r_y0": 764.9, - "r_x1": 521.99, - "r_y1": 764.9, - "r_x2": 521.99, - "r_y2": 744.09, - "r_x3": 89.24, - "r_y3": 744.09, + "r_x0": 534.41, + "r_y0": 488.43, + "r_x1": 596.97, + "r_y1": 488.43, + "r_x2": 596.97, + "r_y2": 474.71, + "r_x3": 534.41, + "r_y3": 474.71, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Column 0", + "orig": "Column 0", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -94,21 +94,21 @@ "a": 255 }, "rect": { - "r_x0": 89.24, - "r_y0": 739.2, - "r_x1": 523.21, - "r_y1": 739.2, - "r_x2": 523.21, - "r_y2": 717.17, - "r_x3": 89.24, - "r_y3": 717.17, + "r_x0": 420.78, + "r_y0": 488.43, + "r_x1": 483.35, + "r_y1": 488.43, + "r_x2": 483.35, + "r_y2": 474.71, + "r_x3": 420.78, + "r_y3": 474.71, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Column 1", + "orig": "Column 1", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 2, @@ -119,21 +119,271 @@ "a": 255 }, "rect": { - "r_x0": 441.26, - "r_y0": 710.03, - "r_x1": 522.03, - "r_y1": 710.03, - "r_x2": 522.03, - "r_y2": 690.04, - "r_x3": 441.26, - "r_y3": 690.04, + "r_x0": 307.16, + "r_y0": 488.43, + "r_x1": 369.73, + "r_y1": 488.43, + "r_x2": 369.73, + "r_y2": 474.71, + "r_x3": 307.16, + "r_y3": 474.71, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Column 2", + "orig": "Column 2", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 641.33, + "r_y0": 420.93, + "r_x1": 718.47, + "r_y1": 420.93, + "r_x2": 718.47, + "r_y2": 407.21, + "r_x3": 641.33, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 531.29, + "r_y0": 420.93, + "r_x1": 600.35, + "r_y1": 420.93, + "r_x2": 600.35, + "r_y2": 407.21, + "r_x3": 531.29, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 410.9, + "r_y0": 420.93, + "r_x1": 494.6, + "r_y1": 420.93, + "r_x2": 494.6, + "r_y2": 407.21, + "r_x3": 410.9, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.21, + "r_y0": 420.93, + "r_x1": 350.6, + "r_y1": 420.93, + "r_x2": 350.6, + "r_y2": 407.21, + "r_x3": 326.21, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 647.54, + "r_y0": 352.3, + "r_x1": 711.72, + "r_y1": 352.3, + "r_x2": 711.72, + "r_y2": 338.59, + "r_x3": 647.54, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 435.41, + "r_y0": 352.3, + "r_x1": 468.73, + "r_y1": 352.3, + "r_x2": 468.73, + "r_y2": 338.59, + "r_x3": 435.41, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 323.39, + "r_y0": 352.3, + "r_x1": 355.1, + "r_y1": 352.3, + "r_x2": 355.1, + "r_y2": 338.59, + "r_x3": 323.39, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 634.24, + "r_y0": 279.18, + "r_x1": 725.22, + "r_y1": 279.18, + "r_x2": 725.22, + "r_y2": 265.46, + "r_x3": 634.24, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 542.27, + "r_y0": 279.18, + "r_x1": 590.22, + "r_y1": 279.18, + "r_x2": 590.22, + "r_y2": 265.46, + "r_x3": 542.27, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 319.15, + "r_y0": 279.18, + "r_x1": 357.35, + "r_y1": 279.18, + "r_x2": 357.35, + "r_y2": 265.46, + "r_x3": 319.15, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], "has_chars": false, @@ -147,15 +397,15 @@ "clusters": [ { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 89.24, - "t": 717.17, - "r": 523.21, - "b": 764.9, + "l": 280.59, + "t": 233.73, + "r": 738.57, + "b": 519.09, "coord_origin": "TOPLEFT" }, - "confidence": 0.732, + "confidence": 0.955, "cells": [ { "index": 0, @@ -166,21 +416,21 @@ "a": 255 }, "rect": { - "r_x0": 89.24, - "r_y0": 764.9, - "r_x1": 521.99, - "r_y1": 764.9, - "r_x2": 521.99, - "r_y2": 744.09, - "r_x3": 89.24, - "r_y3": 744.09, + "r_x0": 534.41, + "r_y0": 488.43, + "r_x1": 596.97, + "r_y1": 488.43, + "r_x2": 596.97, + "r_y2": 474.71, + "r_x3": 534.41, + "r_y3": 474.71, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Column 0", + "orig": "Column 0", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -191,37 +441,22 @@ "a": 255 }, "rect": { - "r_x0": 89.24, - "r_y0": 739.2, - "r_x1": 523.21, - "r_y1": 739.2, - "r_x2": 523.21, - "r_y2": 717.17, - "r_x3": 89.24, - "r_y3": 717.17, + "r_x0": 420.78, + "r_y0": 488.43, + "r_x1": 483.35, + "r_y1": 488.43, + "r_x2": 483.35, + "r_y2": 474.71, + "r_x3": 420.78, + "r_y3": 474.71, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Column 1", + "orig": "Column 1", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - { - "id": 2, - "label": "text", - "bbox": { - "l": 441.26, - "t": 690.04, - "r": 522.03, - "b": 710.03, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.598, - "cells": [ + "from_ocr": false + }, { "index": 2, "rgba": { @@ -231,29 +466,1941 @@ "a": 255 }, "rect": { - "r_x0": 441.26, - "r_y0": 710.03, - "r_x1": 522.03, - "r_y1": 710.03, - "r_x2": 522.03, - "r_y2": 690.04, - "r_x3": 441.26, - "r_y3": 690.04, + "r_x0": 307.16, + "r_y0": 488.43, + "r_x1": 369.73, + "r_y1": 488.43, + "r_x2": 369.73, + "r_y2": 474.71, + "r_x3": 307.16, + "r_y3": 474.71, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Column 2", + "orig": "Column 2", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 641.33, + "r_y0": 420.93, + "r_x1": 718.47, + "r_y1": 420.93, + "r_x2": 718.47, + "r_y2": 407.21, + "r_x3": 641.33, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 531.29, + "r_y0": 420.93, + "r_x1": 600.35, + "r_y1": 420.93, + "r_x2": 600.35, + "r_y2": 407.21, + "r_x3": 531.29, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 410.9, + "r_y0": 420.93, + "r_x1": 494.6, + "r_y1": 420.93, + "r_x2": 494.6, + "r_y2": 407.21, + "r_x3": 410.9, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.21, + "r_y0": 420.93, + "r_x1": 350.6, + "r_y1": 420.93, + "r_x2": 350.6, + "r_y2": 407.21, + "r_x3": 326.21, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 647.54, + "r_y0": 352.3, + "r_x1": 711.72, + "r_y1": 352.3, + "r_x2": 711.72, + "r_y2": 338.59, + "r_x3": 647.54, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 435.41, + "r_y0": 352.3, + "r_x1": 468.73, + "r_y1": 352.3, + "r_x2": 468.73, + "r_y2": 338.59, + "r_x3": 435.41, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 323.39, + "r_y0": 352.3, + "r_x1": 355.1, + "r_y1": 352.3, + "r_x2": 355.1, + "r_y2": 338.59, + "r_x3": 323.39, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 634.24, + "r_y0": 279.18, + "r_x1": 725.22, + "r_y1": 279.18, + "r_x2": 725.22, + "r_y2": 265.46, + "r_x3": 634.24, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 542.27, + "r_y0": 279.18, + "r_x1": 590.22, + "r_y1": 279.18, + "r_x2": 590.22, + "r_y2": 265.46, + "r_x3": 542.27, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 319.15, + "r_y0": 279.18, + "r_x1": 357.35, + "r_y1": 279.18, + "r_x2": 357.35, + "r_y2": 265.46, + "r_x3": 319.15, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], - "children": [] + "children": [ + { + "id": 16, + "label": "text", + "bbox": { + "l": 534.41, + "t": 474.71, + "r": 596.97, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 534.41, + "r_y0": 488.43, + "r_x1": 596.97, + "r_y1": 488.43, + "r_x2": 596.97, + "r_y2": 474.71, + "r_x3": 534.41, + "r_y3": 474.71, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 17, + "label": "text", + "bbox": { + "l": 420.78, + "t": 474.71, + "r": 483.35, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 420.78, + "r_y0": 488.43, + "r_x1": 483.35, + "r_y1": 488.43, + "r_x2": 483.35, + "r_y2": 474.71, + "r_x3": 420.78, + "r_y3": 474.71, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 18, + "label": "text", + "bbox": { + "l": 307.16, + "t": 474.71, + "r": 369.73, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 307.16, + "r_y0": 488.43, + "r_x1": 369.73, + "r_y1": 488.43, + "r_x2": 369.73, + "r_y2": 474.71, + "r_x3": 307.16, + "r_y3": 474.71, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 19, + "label": "text", + "bbox": { + "l": 641.33, + "t": 407.21, + "r": 718.47, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 641.33, + "r_y0": 420.93, + "r_x1": 718.47, + "r_y1": 420.93, + "r_x2": 718.47, + "r_y2": 407.21, + "r_x3": 641.33, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 20, + "label": "text", + "bbox": { + "l": 531.29, + "t": 407.21, + "r": 600.35, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 531.29, + "r_y0": 420.93, + "r_x1": 600.35, + "r_y1": 420.93, + "r_x2": 600.35, + "r_y2": 407.21, + "r_x3": 531.29, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 21, + "label": "text", + "bbox": { + "l": 410.9, + "t": 407.21, + "r": 494.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 410.9, + "r_y0": 420.93, + "r_x1": 494.6, + "r_y1": 420.93, + "r_x2": 494.6, + "r_y2": 407.21, + "r_x3": 410.9, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 22, + "label": "text", + "bbox": { + "l": 326.21, + "t": 407.21, + "r": 350.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.21, + "r_y0": 420.93, + "r_x1": 350.6, + "r_y1": 420.93, + "r_x2": 350.6, + "r_y2": 407.21, + "r_x3": 326.21, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 23, + "label": "text", + "bbox": { + "l": 647.54, + "t": 338.59, + "r": 711.72, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 647.54, + "r_y0": 352.3, + "r_x1": 711.72, + "r_y1": 352.3, + "r_x2": 711.72, + "r_y2": 338.59, + "r_x3": 647.54, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 24, + "label": "text", + "bbox": { + "l": 435.41, + "t": 338.59, + "r": 468.73, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 435.41, + "r_y0": 352.3, + "r_x1": 468.73, + "r_y1": 352.3, + "r_x2": 468.73, + "r_y2": 338.59, + "r_x3": 435.41, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 25, + "label": "text", + "bbox": { + "l": 323.39, + "t": 338.59, + "r": 355.1, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 323.39, + "r_y0": 352.3, + "r_x1": 355.1, + "r_y1": 352.3, + "r_x2": 355.1, + "r_y2": 338.59, + "r_x3": 323.39, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 26, + "label": "text", + "bbox": { + "l": 634.24, + "t": 265.46, + "r": 725.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 634.24, + "r_y0": 279.18, + "r_x1": 725.22, + "r_y1": 279.18, + "r_x2": 725.22, + "r_y2": 265.46, + "r_x3": 634.24, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 27, + "label": "text", + "bbox": { + "l": 542.27, + "t": 265.46, + "r": 590.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 542.27, + "r_y0": 279.18, + "r_x1": 590.22, + "r_y1": 279.18, + "r_x2": 590.22, + "r_y2": 265.46, + "r_x3": 542.27, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 28, + "label": "text", + "bbox": { + "l": 319.15, + "t": 265.46, + "r": 357.35, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 319.15, + "r_y0": 279.18, + "r_x1": 357.35, + "r_y1": 279.18, + "r_x2": 357.35, + "r_y2": 265.46, + "r_x3": 319.15, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] } ] }, "tablestructure": { - "table_map": {} + "table_map": { + "0": { + "label": "table", + "id": 0, + "page_no": 0, + "cluster": { + "id": 0, + "label": "table", + "bbox": { + "l": 280.59, + "t": 233.73, + "r": 738.57, + "b": 519.09, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.955, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 534.41, + "r_y0": 488.43, + "r_x1": 596.97, + "r_y1": 488.43, + "r_x2": 596.97, + "r_y2": 474.71, + "r_x3": 534.41, + "r_y3": 474.71, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 420.78, + "r_y0": 488.43, + "r_x1": 483.35, + "r_y1": 488.43, + "r_x2": 483.35, + "r_y2": 474.71, + "r_x3": 420.78, + "r_y3": 474.71, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 307.16, + "r_y0": 488.43, + "r_x1": 369.73, + "r_y1": 488.43, + "r_x2": 369.73, + "r_y2": 474.71, + "r_x3": 307.16, + "r_y3": 474.71, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 641.33, + "r_y0": 420.93, + "r_x1": 718.47, + "r_y1": 420.93, + "r_x2": 718.47, + "r_y2": 407.21, + "r_x3": 641.33, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 531.29, + "r_y0": 420.93, + "r_x1": 600.35, + "r_y1": 420.93, + "r_x2": 600.35, + "r_y2": 407.21, + "r_x3": 531.29, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 410.9, + "r_y0": 420.93, + "r_x1": 494.6, + "r_y1": 420.93, + "r_x2": 494.6, + "r_y2": 407.21, + "r_x3": 410.9, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.21, + "r_y0": 420.93, + "r_x1": 350.6, + "r_y1": 420.93, + "r_x2": 350.6, + "r_y2": 407.21, + "r_x3": 326.21, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 647.54, + "r_y0": 352.3, + "r_x1": 711.72, + "r_y1": 352.3, + "r_x2": 711.72, + "r_y2": 338.59, + "r_x3": 647.54, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 435.41, + "r_y0": 352.3, + "r_x1": 468.73, + "r_y1": 352.3, + "r_x2": 468.73, + "r_y2": 338.59, + "r_x3": 435.41, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 323.39, + "r_y0": 352.3, + "r_x1": 355.1, + "r_y1": 352.3, + "r_x2": 355.1, + "r_y2": 338.59, + "r_x3": 323.39, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 634.24, + "r_y0": 279.18, + "r_x1": 725.22, + "r_y1": 279.18, + "r_x2": 725.22, + "r_y2": 265.46, + "r_x3": 634.24, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 542.27, + "r_y0": 279.18, + "r_x1": 590.22, + "r_y1": 279.18, + "r_x2": 590.22, + "r_y2": 265.46, + "r_x3": 542.27, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 319.15, + "r_y0": 279.18, + "r_x1": 357.35, + "r_y1": 279.18, + "r_x2": 357.35, + "r_y2": 265.46, + "r_x3": 319.15, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [ + { + "id": 16, + "label": "text", + "bbox": { + "l": 534.41, + "t": 474.71, + "r": 596.97, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 534.41, + "r_y0": 488.43, + "r_x1": 596.97, + "r_y1": 488.43, + "r_x2": 596.97, + "r_y2": 474.71, + "r_x3": 534.41, + "r_y3": 474.71, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 17, + "label": "text", + "bbox": { + "l": 420.78, + "t": 474.71, + "r": 483.35, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 420.78, + "r_y0": 488.43, + "r_x1": 483.35, + "r_y1": 488.43, + "r_x2": 483.35, + "r_y2": 474.71, + "r_x3": 420.78, + "r_y3": 474.71, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 18, + "label": "text", + "bbox": { + "l": 307.16, + "t": 474.71, + "r": 369.73, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 307.16, + "r_y0": 488.43, + "r_x1": 369.73, + "r_y1": 488.43, + "r_x2": 369.73, + "r_y2": 474.71, + "r_x3": 307.16, + "r_y3": 474.71, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 19, + "label": "text", + "bbox": { + "l": 641.33, + "t": 407.21, + "r": 718.47, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 641.33, + "r_y0": 420.93, + "r_x1": 718.47, + "r_y1": 420.93, + "r_x2": 718.47, + "r_y2": 407.21, + "r_x3": 641.33, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 20, + "label": "text", + "bbox": { + "l": 531.29, + "t": 407.21, + "r": 600.35, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 531.29, + "r_y0": 420.93, + "r_x1": 600.35, + "r_y1": 420.93, + "r_x2": 600.35, + "r_y2": 407.21, + "r_x3": 531.29, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 21, + "label": "text", + "bbox": { + "l": 410.9, + "t": 407.21, + "r": 494.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 410.9, + "r_y0": 420.93, + "r_x1": 494.6, + "r_y1": 420.93, + "r_x2": 494.6, + "r_y2": 407.21, + "r_x3": 410.9, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 22, + "label": "text", + "bbox": { + "l": 326.21, + "t": 407.21, + "r": 350.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.21, + "r_y0": 420.93, + "r_x1": 350.6, + "r_y1": 420.93, + "r_x2": 350.6, + "r_y2": 407.21, + "r_x3": 326.21, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 23, + "label": "text", + "bbox": { + "l": 647.54, + "t": 338.59, + "r": 711.72, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 647.54, + "r_y0": 352.3, + "r_x1": 711.72, + "r_y1": 352.3, + "r_x2": 711.72, + "r_y2": 338.59, + "r_x3": 647.54, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 24, + "label": "text", + "bbox": { + "l": 435.41, + "t": 338.59, + "r": 468.73, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 435.41, + "r_y0": 352.3, + "r_x1": 468.73, + "r_y1": 352.3, + "r_x2": 468.73, + "r_y2": 338.59, + "r_x3": 435.41, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 25, + "label": "text", + "bbox": { + "l": 323.39, + "t": 338.59, + "r": 355.1, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 323.39, + "r_y0": 352.3, + "r_x1": 355.1, + "r_y1": 352.3, + "r_x2": 355.1, + "r_y2": 338.59, + "r_x3": 323.39, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 26, + "label": "text", + "bbox": { + "l": 634.24, + "t": 265.46, + "r": 725.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 634.24, + "r_y0": 279.18, + "r_x1": 725.22, + "r_y1": 279.18, + "r_x2": 725.22, + "r_y2": 265.46, + "r_x3": 634.24, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 27, + "label": "text", + "bbox": { + "l": 542.27, + "t": 265.46, + "r": 590.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 542.27, + "r_y0": 279.18, + "r_x1": 590.22, + "r_y1": 279.18, + "r_x2": 590.22, + "r_y2": 265.46, + "r_x3": 542.27, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 28, + "label": "text", + "bbox": { + "l": 319.15, + "t": 265.46, + "r": 357.35, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 319.15, + "r_y0": 279.18, + "r_x1": 357.35, + "r_y1": 279.18, + "r_x2": 357.35, + "r_y2": 265.46, + "r_x3": 319.15, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "ecel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "ecel", + "nl" + ], + "num_rows": 4, + "num_cols": 4, + "table_cells": [ + { + "bbox": { + "l": 534.41, + "t": 474.71, + "r": 596.97, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Column 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 420.78, + "t": 474.71, + "r": 483.35, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Column 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 307.16, + "t": 474.71, + "r": 369.73, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 641.33, + "t": 407.21, + "r": 718.47, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "this is row 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 531.29, + "t": 407.21, + "r": 600.35, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 410.9, + "t": 407.21, + "r": 494.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 326.21, + "t": 407.21, + "r": 350.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 647.54, + "t": 338.59, + "r": 711.72, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and row 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 435.41, + "t": 338.59, + "r": 468.73, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 323.39, + "t": 338.59, + "r": 355.1, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 634.24, + "t": 265.46, + "r": 725.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and last row 2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 542.27, + "t": 265.46, + "r": 590.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "nothing", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 319.15, + "t": 265.46, + "r": 357.35, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "inside", + "column_header": true, + "row_header": false, + "row_section": false + } + ] + } + } }, "figures_classification": null, "equations_prediction": null, @@ -262,20 +2409,20 @@ "assembled": { "elements": [ { - "label": "text", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 89.24, - "t": 717.17, - "r": 523.21, - "b": 764.9, + "l": 280.59, + "t": 233.73, + "r": 738.57, + "b": 519.09, "coord_origin": "TOPLEFT" }, - "confidence": 0.732, + "confidence": 0.955, "cells": [ { "index": 0, @@ -286,21 +2433,21 @@ "a": 255 }, "rect": { - "r_x0": 89.24, - "r_y0": 764.9, - "r_x1": 521.99, - "r_y1": 764.9, - "r_x2": 521.99, - "r_y2": 744.09, - "r_x3": 89.24, - "r_y3": 744.09, + "r_x0": 534.41, + "r_y0": 488.43, + "r_x1": 596.97, + "r_y1": 488.43, + "r_x2": 596.97, + "r_y2": 474.71, + "r_x3": 534.41, + "r_y3": 474.71, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Column 0", + "orig": "Column 0", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -311,43 +2458,22 @@ "a": 255 }, "rect": { - "r_x0": 89.24, - "r_y0": 739.2, - "r_x1": 523.21, - "r_y1": 739.2, - "r_x2": 523.21, - "r_y2": 717.17, - "r_x3": 89.24, - "r_y3": 717.17, + "r_x0": 420.78, + "r_y0": 488.43, + "r_x1": 483.35, + "r_y1": 488.43, + "r_x2": 483.35, + "r_y2": 474.71, + "r_x3": 420.78, + "r_y3": 474.71, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Column 1", + "orig": "Column 1", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "label": "text", - "id": 2, - "page_no": 0, - "cluster": { - "id": 2, - "label": "text", - "bbox": { - "l": 441.26, - "t": 690.04, - "r": 522.03, - "b": 710.03, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.598, - "cells": [ + "from_ocr": false + }, { "index": 2, "rgba": { @@ -357,44 +2483,1088 @@ "a": 255 }, "rect": { - "r_x0": 441.26, - "r_y0": 710.03, - "r_x1": 522.03, - "r_y1": 710.03, - "r_x2": 522.03, - "r_y2": 690.04, - "r_x3": 441.26, - "r_y3": 690.04, + "r_x0": 307.16, + "r_y0": 488.43, + "r_x1": 369.73, + "r_y1": 488.43, + "r_x2": 369.73, + "r_y2": 474.71, + "r_x3": 307.16, + "r_y3": 474.71, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Column 2", + "orig": "Column 2", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 641.33, + "r_y0": 420.93, + "r_x1": 718.47, + "r_y1": 420.93, + "r_x2": 718.47, + "r_y2": 407.21, + "r_x3": 641.33, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 531.29, + "r_y0": 420.93, + "r_x1": 600.35, + "r_y1": 420.93, + "r_x2": 600.35, + "r_y2": 407.21, + "r_x3": 531.29, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 410.9, + "r_y0": 420.93, + "r_x1": 494.6, + "r_y1": 420.93, + "r_x2": 494.6, + "r_y2": 407.21, + "r_x3": 410.9, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.21, + "r_y0": 420.93, + "r_x1": 350.6, + "r_y1": 420.93, + "r_x2": 350.6, + "r_y2": 407.21, + "r_x3": 326.21, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 647.54, + "r_y0": 352.3, + "r_x1": 711.72, + "r_y1": 352.3, + "r_x2": 711.72, + "r_y2": 338.59, + "r_x3": 647.54, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 435.41, + "r_y0": 352.3, + "r_x1": 468.73, + "r_y1": 352.3, + "r_x2": 468.73, + "r_y2": 338.59, + "r_x3": 435.41, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 323.39, + "r_y0": 352.3, + "r_x1": 355.1, + "r_y1": 352.3, + "r_x2": 355.1, + "r_y2": 338.59, + "r_x3": 323.39, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 634.24, + "r_y0": 279.18, + "r_x1": 725.22, + "r_y1": 279.18, + "r_x2": 725.22, + "r_y2": 265.46, + "r_x3": 634.24, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 542.27, + "r_y0": 279.18, + "r_x1": 590.22, + "r_y1": 279.18, + "r_x2": 590.22, + "r_y2": 265.46, + "r_x3": 542.27, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 319.15, + "r_y0": 279.18, + "r_x1": 357.35, + "r_y1": 279.18, + "r_x2": 357.35, + "r_y2": 265.46, + "r_x3": 319.15, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], - "children": [] + "children": [ + { + "id": 16, + "label": "text", + "bbox": { + "l": 534.41, + "t": 474.71, + "r": 596.97, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 534.41, + "r_y0": 488.43, + "r_x1": 596.97, + "r_y1": 488.43, + "r_x2": 596.97, + "r_y2": 474.71, + "r_x3": 534.41, + "r_y3": 474.71, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 17, + "label": "text", + "bbox": { + "l": 420.78, + "t": 474.71, + "r": 483.35, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 420.78, + "r_y0": 488.43, + "r_x1": 483.35, + "r_y1": 488.43, + "r_x2": 483.35, + "r_y2": 474.71, + "r_x3": 420.78, + "r_y3": 474.71, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 18, + "label": "text", + "bbox": { + "l": 307.16, + "t": 474.71, + "r": 369.73, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 307.16, + "r_y0": 488.43, + "r_x1": 369.73, + "r_y1": 488.43, + "r_x2": 369.73, + "r_y2": 474.71, + "r_x3": 307.16, + "r_y3": 474.71, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 19, + "label": "text", + "bbox": { + "l": 641.33, + "t": 407.21, + "r": 718.47, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 641.33, + "r_y0": 420.93, + "r_x1": 718.47, + "r_y1": 420.93, + "r_x2": 718.47, + "r_y2": 407.21, + "r_x3": 641.33, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 20, + "label": "text", + "bbox": { + "l": 531.29, + "t": 407.21, + "r": 600.35, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 531.29, + "r_y0": 420.93, + "r_x1": 600.35, + "r_y1": 420.93, + "r_x2": 600.35, + "r_y2": 407.21, + "r_x3": 531.29, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 21, + "label": "text", + "bbox": { + "l": 410.9, + "t": 407.21, + "r": 494.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 410.9, + "r_y0": 420.93, + "r_x1": 494.6, + "r_y1": 420.93, + "r_x2": 494.6, + "r_y2": 407.21, + "r_x3": 410.9, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 22, + "label": "text", + "bbox": { + "l": 326.21, + "t": 407.21, + "r": 350.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.21, + "r_y0": 420.93, + "r_x1": 350.6, + "r_y1": 420.93, + "r_x2": 350.6, + "r_y2": 407.21, + "r_x3": 326.21, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 23, + "label": "text", + "bbox": { + "l": 647.54, + "t": 338.59, + "r": 711.72, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 647.54, + "r_y0": 352.3, + "r_x1": 711.72, + "r_y1": 352.3, + "r_x2": 711.72, + "r_y2": 338.59, + "r_x3": 647.54, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 24, + "label": "text", + "bbox": { + "l": 435.41, + "t": 338.59, + "r": 468.73, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 435.41, + "r_y0": 352.3, + "r_x1": 468.73, + "r_y1": 352.3, + "r_x2": 468.73, + "r_y2": 338.59, + "r_x3": 435.41, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 25, + "label": "text", + "bbox": { + "l": 323.39, + "t": 338.59, + "r": 355.1, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 323.39, + "r_y0": 352.3, + "r_x1": 355.1, + "r_y1": 352.3, + "r_x2": 355.1, + "r_y2": 338.59, + "r_x3": 323.39, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 26, + "label": "text", + "bbox": { + "l": 634.24, + "t": 265.46, + "r": 725.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 634.24, + "r_y0": 279.18, + "r_x1": 725.22, + "r_y1": 279.18, + "r_x2": 725.22, + "r_y2": 265.46, + "r_x3": 634.24, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 27, + "label": "text", + "bbox": { + "l": 542.27, + "t": 265.46, + "r": 590.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 542.27, + "r_y0": 279.18, + "r_x1": 590.22, + "r_y1": 279.18, + "r_x2": 590.22, + "r_y2": 265.46, + "r_x3": 542.27, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 28, + "label": "text", + "bbox": { + "l": 319.15, + "t": 265.46, + "r": 357.35, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 319.15, + "r_y0": 279.18, + "r_x1": 357.35, + "r_y1": 279.18, + "r_x2": 357.35, + "r_y2": 265.46, + "r_x3": 319.15, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] }, - "text": "package" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "ecel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "ecel", + "nl" + ], + "num_rows": 4, + "num_cols": 4, + "table_cells": [ + { + "bbox": { + "l": 534.41, + "t": 474.71, + "r": 596.97, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Column 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 420.78, + "t": 474.71, + "r": 483.35, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Column 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 307.16, + "t": 474.71, + "r": 369.73, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 641.33, + "t": 407.21, + "r": 718.47, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "this is row 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 531.29, + "t": 407.21, + "r": 600.35, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 410.9, + "t": 407.21, + "r": 494.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 326.21, + "t": 407.21, + "r": 350.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 647.54, + "t": 338.59, + "r": 711.72, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and row 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 435.41, + "t": 338.59, + "r": 468.73, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 323.39, + "t": 338.59, + "r": 355.1, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 634.24, + "t": 265.46, + "r": 725.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and last row 2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 542.27, + "t": 265.46, + "r": 590.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "nothing", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 319.15, + "t": 265.46, + "r": 357.35, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "inside", + "column_header": true, + "row_header": false, + "row_section": false + } + ] } ], "body": [ { - "label": "text", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 89.24, - "t": 717.17, - "r": 523.21, - "b": 764.9, + "l": 280.59, + "t": 233.73, + "r": 738.57, + "b": 519.09, "coord_origin": "TOPLEFT" }, - "confidence": 0.732, + "confidence": 0.955, "cells": [ { "index": 0, @@ -405,21 +3575,21 @@ "a": 255 }, "rect": { - "r_x0": 89.24, - "r_y0": 764.9, - "r_x1": 521.99, - "r_y1": 764.9, - "r_x2": 521.99, - "r_y2": 744.09, - "r_x3": 89.24, - "r_y3": 744.09, + "r_x0": 534.41, + "r_y0": 488.43, + "r_x1": 596.97, + "r_y1": 488.43, + "r_x2": 596.97, + "r_y2": 474.71, + "r_x3": 534.41, + "r_y3": 474.71, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Column 0", + "orig": "Column 0", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -430,43 +3600,22 @@ "a": 255 }, "rect": { - "r_x0": 89.24, - "r_y0": 739.2, - "r_x1": 523.21, - "r_y1": 739.2, - "r_x2": 523.21, - "r_y2": 717.17, - "r_x3": 89.24, - "r_y3": 717.17, + "r_x0": 420.78, + "r_y0": 488.43, + "r_x1": 483.35, + "r_y1": 488.43, + "r_x2": 483.35, + "r_y2": 474.71, + "r_x3": 420.78, + "r_y3": 474.71, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Column 1", + "orig": "Column 1", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "label": "text", - "id": 2, - "page_no": 0, - "cluster": { - "id": 2, - "label": "text", - "bbox": { - "l": 441.26, - "t": 690.04, - "r": 522.03, - "b": 710.03, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.598, - "cells": [ + "from_ocr": false + }, { "index": 2, "rgba": { @@ -476,26 +3625,1070 @@ "a": 255 }, "rect": { - "r_x0": 441.26, - "r_y0": 710.03, - "r_x1": 522.03, - "r_y1": 710.03, - "r_x2": 522.03, - "r_y2": 690.04, - "r_x3": 441.26, - "r_y3": 690.04, + "r_x0": 307.16, + "r_y0": 488.43, + "r_x1": 369.73, + "r_y1": 488.43, + "r_x2": 369.73, + "r_y2": 474.71, + "r_x3": 307.16, + "r_y3": 474.71, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Column 2", + "orig": "Column 2", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 641.33, + "r_y0": 420.93, + "r_x1": 718.47, + "r_y1": 420.93, + "r_x2": 718.47, + "r_y2": 407.21, + "r_x3": 641.33, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 531.29, + "r_y0": 420.93, + "r_x1": 600.35, + "r_y1": 420.93, + "r_x2": 600.35, + "r_y2": 407.21, + "r_x3": 531.29, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 410.9, + "r_y0": 420.93, + "r_x1": 494.6, + "r_y1": 420.93, + "r_x2": 494.6, + "r_y2": 407.21, + "r_x3": 410.9, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.21, + "r_y0": 420.93, + "r_x1": 350.6, + "r_y1": 420.93, + "r_x2": 350.6, + "r_y2": 407.21, + "r_x3": 326.21, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 647.54, + "r_y0": 352.3, + "r_x1": 711.72, + "r_y1": 352.3, + "r_x2": 711.72, + "r_y2": 338.59, + "r_x3": 647.54, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 435.41, + "r_y0": 352.3, + "r_x1": 468.73, + "r_y1": 352.3, + "r_x2": 468.73, + "r_y2": 338.59, + "r_x3": 435.41, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 323.39, + "r_y0": 352.3, + "r_x1": 355.1, + "r_y1": 352.3, + "r_x2": 355.1, + "r_y2": 338.59, + "r_x3": 323.39, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 634.24, + "r_y0": 279.18, + "r_x1": 725.22, + "r_y1": 279.18, + "r_x2": 725.22, + "r_y2": 265.46, + "r_x3": 634.24, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 542.27, + "r_y0": 279.18, + "r_x1": 590.22, + "r_y1": 279.18, + "r_x2": 590.22, + "r_y2": 265.46, + "r_x3": 542.27, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 319.15, + "r_y0": 279.18, + "r_x1": 357.35, + "r_y1": 279.18, + "r_x2": 357.35, + "r_y2": 265.46, + "r_x3": 319.15, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], - "children": [] + "children": [ + { + "id": 16, + "label": "text", + "bbox": { + "l": 534.41, + "t": 474.71, + "r": 596.97, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 534.41, + "r_y0": 488.43, + "r_x1": 596.97, + "r_y1": 488.43, + "r_x2": 596.97, + "r_y2": 474.71, + "r_x3": 534.41, + "r_y3": 474.71, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 17, + "label": "text", + "bbox": { + "l": 420.78, + "t": 474.71, + "r": 483.35, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 420.78, + "r_y0": 488.43, + "r_x1": 483.35, + "r_y1": 488.43, + "r_x2": 483.35, + "r_y2": 474.71, + "r_x3": 420.78, + "r_y3": 474.71, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 18, + "label": "text", + "bbox": { + "l": 307.16, + "t": 474.71, + "r": 369.73, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 307.16, + "r_y0": 488.43, + "r_x1": 369.73, + "r_y1": 488.43, + "r_x2": 369.73, + "r_y2": 474.71, + "r_x3": 307.16, + "r_y3": 474.71, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 19, + "label": "text", + "bbox": { + "l": 641.33, + "t": 407.21, + "r": 718.47, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 641.33, + "r_y0": 420.93, + "r_x1": 718.47, + "r_y1": 420.93, + "r_x2": 718.47, + "r_y2": 407.21, + "r_x3": 641.33, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 20, + "label": "text", + "bbox": { + "l": 531.29, + "t": 407.21, + "r": 600.35, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 531.29, + "r_y0": 420.93, + "r_x1": 600.35, + "r_y1": 420.93, + "r_x2": 600.35, + "r_y2": 407.21, + "r_x3": 531.29, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 21, + "label": "text", + "bbox": { + "l": 410.9, + "t": 407.21, + "r": 494.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 410.9, + "r_y0": 420.93, + "r_x1": 494.6, + "r_y1": 420.93, + "r_x2": 494.6, + "r_y2": 407.21, + "r_x3": 410.9, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 22, + "label": "text", + "bbox": { + "l": 326.21, + "t": 407.21, + "r": 350.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 326.21, + "r_y0": 420.93, + "r_x1": 350.6, + "r_y1": 420.93, + "r_x2": 350.6, + "r_y2": 407.21, + "r_x3": 326.21, + "r_y3": 407.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 23, + "label": "text", + "bbox": { + "l": 647.54, + "t": 338.59, + "r": 711.72, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 647.54, + "r_y0": 352.3, + "r_x1": 711.72, + "r_y1": 352.3, + "r_x2": 711.72, + "r_y2": 338.59, + "r_x3": 647.54, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 24, + "label": "text", + "bbox": { + "l": 435.41, + "t": 338.59, + "r": 468.73, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 435.41, + "r_y0": 352.3, + "r_x1": 468.73, + "r_y1": 352.3, + "r_x2": 468.73, + "r_y2": 338.59, + "r_x3": 435.41, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 25, + "label": "text", + "bbox": { + "l": 323.39, + "t": 338.59, + "r": 355.1, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 323.39, + "r_y0": 352.3, + "r_x1": 355.1, + "r_y1": 352.3, + "r_x2": 355.1, + "r_y2": 338.59, + "r_x3": 323.39, + "r_y3": 338.59, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 26, + "label": "text", + "bbox": { + "l": 634.24, + "t": 265.46, + "r": 725.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 634.24, + "r_y0": 279.18, + "r_x1": 725.22, + "r_y1": 279.18, + "r_x2": 725.22, + "r_y2": 265.46, + "r_x3": 634.24, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 27, + "label": "text", + "bbox": { + "l": 542.27, + "t": 265.46, + "r": 590.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 542.27, + "r_y0": 279.18, + "r_x1": 590.22, + "r_y1": 279.18, + "r_x2": 590.22, + "r_y2": 265.46, + "r_x3": 542.27, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 28, + "label": "text", + "bbox": { + "l": 319.15, + "t": 265.46, + "r": 357.35, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 319.15, + "r_y0": 279.18, + "r_x1": 357.35, + "r_y1": 279.18, + "r_x2": 357.35, + "r_y2": 265.46, + "r_x3": 319.15, + "r_y3": 265.46, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] }, - "text": "package" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "ecel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "ecel", + "nl" + ], + "num_rows": 4, + "num_cols": 4, + "table_cells": [ + { + "bbox": { + "l": 534.41, + "t": 474.71, + "r": 596.97, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Column 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 420.78, + "t": 474.71, + "r": 483.35, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Column 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 307.16, + "t": 474.71, + "r": 369.73, + "b": 488.43, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 641.33, + "t": 407.21, + "r": 718.47, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "this is row 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 531.29, + "t": 407.21, + "r": 600.35, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 410.9, + "t": 407.21, + "r": 494.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 326.21, + "t": 407.21, + "r": 350.6, + "b": 420.93, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 647.54, + "t": 338.59, + "r": 711.72, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and row 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 435.41, + "t": 338.59, + "r": 468.73, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 323.39, + "t": 338.59, + "r": 355.1, + "b": 352.3, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 634.24, + "t": 265.46, + "r": 725.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and last row 2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 542.27, + "t": 265.46, + "r": 590.22, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "nothing", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 319.15, + "t": 265.46, + "r": 357.35, + "b": 279.18, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "inside", + "column_header": true, + "row_header": false, + "row_section": false + } + ] } ], "headers": [] diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.doctags.txt b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.doctags.txt index 70ee51c4..213dcced 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.doctags.txt +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.doctags.txt @@ -1,3 +1,2 @@ -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained -package +and last row 2and row 1this is row 0nothingsome cellsColumn 0otherhave contentColumn 1insidehaveandColumn 2 \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.json index e4bae43c..28b62daa 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.json @@ -4,7 +4,7 @@ "name": "ocr_test_rotated_270", "origin": { "mimetype": "application/pdf", - "binary_hash": 10890858393843077593, + "binary_hash": 142009988718862333, "filename": "ocr_test_rotated_270.pdf" }, "furniture": { @@ -18,10 +18,7 @@ "self_ref": "#/body", "children": [ { - "$ref": "#/texts/0" - }, - { - "$ref": "#/texts/1" + "$ref": "#/tables/0" } ], "content_layer": "body", @@ -29,71 +26,592 @@ "label": "unspecified" }, "groups": [], - "texts": [ + "texts": [], + "pictures": [], + "tables": [ { - "self_ref": "#/texts/0", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "furniture", - "label": "page_header", - "prov": [ - { - "page_no": 1, - "bbox": { - "l": 717.17, - "t": 524.3, - "r": 764.9, - "b": 90.33, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 86 - ] - } - ], - "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "self_ref": "#/texts/1", + "self_ref": "#/tables/0", "parent": { "$ref": "#/body" }, "children": [], "content_layer": "body", - "label": "text", + "label": "table", "prov": [ { "page_no": 1, "bbox": { - "l": 690.24, - "t": 523.08, - "r": 709.83, - "b": 442.39, + "l": 233.88, + "t": 739.02, + "r": 519.9, + "b": 280.96, "coord_origin": "BOTTOMLEFT" }, "charspan": [ 0, - 7 + 0 ] } ], - "orig": "package", - "text": "package" + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "bbox": { + "l": 474.71, + "t": 245.03, + "r": 488.43, + "b": 307.59, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 474.71, + "t": 358.65, + "r": 488.43, + "b": 421.22, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 474.71, + "t": 472.27, + "r": 488.43, + "b": 534.84, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 123.53, + "r": 420.93, + "b": 200.67, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "this is row 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 241.65, + "r": 420.93, + "b": 310.71, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 347.4, + "r": 420.93, + "b": 431.1, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 491.4, + "r": 420.93, + "b": 515.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 338.59, + "t": 130.28, + "r": 352.3, + "b": 194.46, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "and row 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 338.59, + "t": 373.27, + "r": 352.3, + "b": 406.59, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 338.59, + "t": 486.9, + "r": 352.3, + "b": 518.61, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 265.46, + "t": 116.78, + "r": 279.18, + "b": 207.76, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and last row 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 265.46, + "t": 251.78, + "r": 279.18, + "b": 299.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "nothing", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 265.46, + "t": 484.65, + "r": 279.18, + "b": 522.85, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "inside", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 4, + "num_cols": 4, + "grid": [ + [ + { + "bbox": { + "l": 265.46, + "t": 116.78, + "r": 279.18, + "b": 207.76, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and last row 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 338.59, + "t": 130.28, + "r": 352.3, + "b": 194.46, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "and row 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 123.53, + "r": 420.93, + "b": 200.67, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "this is row 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 265.46, + "t": 251.78, + "r": 279.18, + "b": 299.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "nothing", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 241.65, + "r": 420.93, + "b": 310.71, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 474.71, + "t": 245.03, + "r": 488.43, + "b": 307.59, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 0", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 338.59, + "t": 373.27, + "r": 352.3, + "b": 406.59, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 347.4, + "r": 420.93, + "b": 431.1, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 474.71, + "t": 358.65, + "r": 488.43, + "b": 421.22, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 1", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 265.46, + "t": 484.65, + "r": 279.18, + "b": 522.85, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "inside", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 338.59, + "t": 486.9, + "r": 352.3, + "b": 518.61, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 491.4, + "r": 420.93, + "b": 515.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 474.71, + "t": 472.27, + "r": 488.43, + "b": 534.84, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 2", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + }, + "annotations": [] } ], - "pictures": [], - "tables": [], "key_value_items": [], "form_items": [], "pages": { "1": { "size": { - "width": 841.92, - "height": 595.2 + "width": 595.0, + "height": 842.0 }, "page_no": 1 } diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.pages.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.pages.json index c4a13a3f..339cb0a4 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.pages.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.pages.json @@ -2,8 +2,8 @@ { "page_no": 0, "size": { - "width": 841.92, - "height": 595.2 + "width": 595.0, + "height": 842.0 }, "parsed_page": { "dimension": { @@ -11,47 +11,47 @@ "rect": { "r_x0": 0.0, "r_y0": 0.0, - "r_x1": 595.2, + "r_x1": 842.0, "r_y1": 0.0, - "r_x2": 595.2, - "r_y2": 841.92, + "r_x2": 842.0, + "r_y2": 595.0, "r_x3": 0.0, - "r_y3": 841.92, + "r_y3": 595.0, "coord_origin": "BOTTOMLEFT" }, "boundary_type": "crop_box", "art_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "bleed_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "crop_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "media_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "trim_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" } @@ -69,21 +69,21 @@ "a": 255 }, "rect": { - "r_x0": 744.09, - "r_y0": 504.87, - "r_x1": 764.9, - "r_y1": 504.87, - "r_x2": 764.9, - "r_y2": 73.35, - "r_x3": 744.09, - "r_y3": 73.35, + "r_x0": 474.71, + "r_y0": 307.59, + "r_x1": 488.43, + "r_y1": 307.59, + "r_x2": 488.43, + "r_y2": 245.03, + "r_x3": 474.71, + "r_y3": 245.03, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Column 0", + "orig": "Column 0", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -94,21 +94,21 @@ "a": 255 }, "rect": { - "r_x0": 717.17, - "r_y0": 504.87, - "r_x1": 737.97, - "r_y1": 504.87, - "r_x2": 737.97, - "r_y2": 70.9, - "r_x3": 717.17, - "r_y3": 70.9, + "r_x0": 474.71, + "r_y0": 421.22, + "r_x1": 488.43, + "r_y1": 421.22, + "r_x2": 488.43, + "r_y2": 358.65, + "r_x3": 474.71, + "r_y3": 358.65, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Column 1", + "orig": "Column 1", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 2, @@ -119,21 +119,271 @@ "a": 255 }, "rect": { - "r_x0": 690.24, - "r_y0": 152.81, - "r_x1": 709.83, - "r_y1": 152.81, - "r_x2": 709.83, - "r_y2": 72.12, - "r_x3": 690.24, - "r_y3": 72.12, + "r_x0": 474.71, + "r_y0": 534.84, + "r_x1": 488.43, + "r_y1": 534.84, + "r_x2": 488.43, + "r_y2": 472.27, + "r_x3": 474.71, + "r_y3": 472.27, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Column 2", + "orig": "Column 2", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 200.67, + "r_x1": 420.93, + "r_y1": 200.67, + "r_x2": 420.93, + "r_y2": 123.53, + "r_x3": 407.21, + "r_y3": 123.53, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 310.71, + "r_x1": 420.93, + "r_y1": 310.71, + "r_x2": 420.93, + "r_y2": 241.65, + "r_x3": 407.21, + "r_y3": 241.65, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 431.1, + "r_x1": 420.93, + "r_y1": 431.1, + "r_x2": 420.93, + "r_y2": 347.4, + "r_x3": 407.21, + "r_y3": 347.4, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 515.79, + "r_x1": 420.93, + "r_y1": 515.79, + "r_x2": 420.93, + "r_y2": 491.4, + "r_x3": 407.21, + "r_y3": 491.4, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 194.46, + "r_x1": 352.3, + "r_y1": 194.46, + "r_x2": 352.3, + "r_y2": 130.28, + "r_x3": 338.59, + "r_y3": 130.28, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 406.59, + "r_x1": 352.3, + "r_y1": 406.59, + "r_x2": 352.3, + "r_y2": 373.27, + "r_x3": 338.59, + "r_y3": 373.27, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 518.61, + "r_x1": 352.3, + "r_y1": 518.61, + "r_x2": 352.3, + "r_y2": 486.9, + "r_x3": 338.59, + "r_y3": 486.9, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 207.76, + "r_x1": 279.18, + "r_y1": 207.76, + "r_x2": 279.18, + "r_y2": 116.78, + "r_x3": 265.46, + "r_y3": 116.78, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 299.73, + "r_x1": 279.18, + "r_y1": 299.73, + "r_x2": 279.18, + "r_y2": 251.78, + "r_x3": 265.46, + "r_y3": 251.78, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 522.85, + "r_x1": 279.18, + "r_y1": 522.85, + "r_x2": 279.18, + "r_y2": 484.65, + "r_x3": 265.46, + "r_y3": 484.65, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], "has_chars": false, @@ -147,15 +397,15 @@ "clusters": [ { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 717.17, - "t": 70.9, - "r": 764.9, - "b": 504.87, + "l": 233.88, + "t": 102.98, + "r": 519.9, + "b": 561.04, "coord_origin": "TOPLEFT" }, - "confidence": 0.692, + "confidence": 0.967, "cells": [ { "index": 0, @@ -166,21 +416,21 @@ "a": 255 }, "rect": { - "r_x0": 744.09, - "r_y0": 504.87, - "r_x1": 764.9, - "r_y1": 504.87, - "r_x2": 764.9, - "r_y2": 73.35, - "r_x3": 744.09, - "r_y3": 73.35, + "r_x0": 474.71, + "r_y0": 307.59, + "r_x1": 488.43, + "r_y1": 307.59, + "r_x2": 488.43, + "r_y2": 245.03, + "r_x3": 474.71, + "r_y3": 245.03, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Column 0", + "orig": "Column 0", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -191,37 +441,22 @@ "a": 255 }, "rect": { - "r_x0": 717.17, - "r_y0": 504.87, - "r_x1": 737.97, - "r_y1": 504.87, - "r_x2": 737.97, - "r_y2": 70.9, - "r_x3": 717.17, - "r_y3": 70.9, + "r_x0": 474.71, + "r_y0": 421.22, + "r_x1": 488.43, + "r_y1": 421.22, + "r_x2": 488.43, + "r_y2": 358.65, + "r_x3": 474.71, + "r_y3": 358.65, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Column 1", + "orig": "Column 1", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - { - "id": 8, - "label": "text", - "bbox": { - "l": 690.24, - "t": 72.12, - "r": 709.83, - "b": 152.81, - "coord_origin": "TOPLEFT" - }, - "confidence": 1.0, - "cells": [ + "from_ocr": false + }, { "index": 2, "rgba": { @@ -231,29 +466,1941 @@ "a": 255 }, "rect": { - "r_x0": 690.24, - "r_y0": 152.81, - "r_x1": 709.83, - "r_y1": 152.81, - "r_x2": 709.83, - "r_y2": 72.12, - "r_x3": 690.24, - "r_y3": 72.12, + "r_x0": 474.71, + "r_y0": 534.84, + "r_x1": 488.43, + "r_y1": 534.84, + "r_x2": 488.43, + "r_y2": 472.27, + "r_x3": 474.71, + "r_y3": 472.27, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Column 2", + "orig": "Column 2", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 200.67, + "r_x1": 420.93, + "r_y1": 200.67, + "r_x2": 420.93, + "r_y2": 123.53, + "r_x3": 407.21, + "r_y3": 123.53, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 310.71, + "r_x1": 420.93, + "r_y1": 310.71, + "r_x2": 420.93, + "r_y2": 241.65, + "r_x3": 407.21, + "r_y3": 241.65, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 431.1, + "r_x1": 420.93, + "r_y1": 431.1, + "r_x2": 420.93, + "r_y2": 347.4, + "r_x3": 407.21, + "r_y3": 347.4, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 515.79, + "r_x1": 420.93, + "r_y1": 515.79, + "r_x2": 420.93, + "r_y2": 491.4, + "r_x3": 407.21, + "r_y3": 491.4, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 194.46, + "r_x1": 352.3, + "r_y1": 194.46, + "r_x2": 352.3, + "r_y2": 130.28, + "r_x3": 338.59, + "r_y3": 130.28, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 406.59, + "r_x1": 352.3, + "r_y1": 406.59, + "r_x2": 352.3, + "r_y2": 373.27, + "r_x3": 338.59, + "r_y3": 373.27, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 518.61, + "r_x1": 352.3, + "r_y1": 518.61, + "r_x2": 352.3, + "r_y2": 486.9, + "r_x3": 338.59, + "r_y3": 486.9, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 207.76, + "r_x1": 279.18, + "r_y1": 207.76, + "r_x2": 279.18, + "r_y2": 116.78, + "r_x3": 265.46, + "r_y3": 116.78, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 299.73, + "r_x1": 279.18, + "r_y1": 299.73, + "r_x2": 279.18, + "r_y2": 251.78, + "r_x3": 265.46, + "r_y3": 251.78, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 522.85, + "r_x1": 279.18, + "r_y1": 522.85, + "r_x2": 279.18, + "r_y2": 484.65, + "r_x3": 265.46, + "r_y3": 484.65, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], - "children": [] + "children": [ + { + "id": 4, + "label": "text", + "bbox": { + "l": 474.71, + "t": 245.03, + "r": 488.43, + "b": 307.59, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 474.71, + "r_y0": 307.59, + "r_x1": 488.43, + "r_y1": 307.59, + "r_x2": 488.43, + "r_y2": 245.03, + "r_x3": 474.71, + "r_y3": 245.03, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 474.71, + "t": 358.65, + "r": 488.43, + "b": 421.22, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 474.71, + "r_y0": 421.22, + "r_x1": 488.43, + "r_y1": 421.22, + "r_x2": 488.43, + "r_y2": 358.65, + "r_x3": 474.71, + "r_y3": 358.65, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 474.71, + "t": 472.27, + "r": 488.43, + "b": 534.84, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 474.71, + "r_y0": 534.84, + "r_x1": 488.43, + "r_y1": 534.84, + "r_x2": 488.43, + "r_y2": 472.27, + "r_x3": 474.71, + "r_y3": 472.27, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 407.21, + "t": 123.53, + "r": 420.93, + "b": 200.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 200.67, + "r_x1": 420.93, + "r_y1": 200.67, + "r_x2": 420.93, + "r_y2": 123.53, + "r_x3": 407.21, + "r_y3": 123.53, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 407.21, + "t": 241.65, + "r": 420.93, + "b": 310.71, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 310.71, + "r_x1": 420.93, + "r_y1": 310.71, + "r_x2": 420.93, + "r_y2": 241.65, + "r_x3": 407.21, + "r_y3": 241.65, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 407.21, + "t": 347.4, + "r": 420.93, + "b": 431.1, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 431.1, + "r_x1": 420.93, + "r_y1": 431.1, + "r_x2": 420.93, + "r_y2": 347.4, + "r_x3": 407.21, + "r_y3": 347.4, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 407.21, + "t": 491.4, + "r": 420.93, + "b": 515.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 515.79, + "r_x1": 420.93, + "r_y1": 515.79, + "r_x2": 420.93, + "r_y2": 491.4, + "r_x3": 407.21, + "r_y3": 491.4, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 338.59, + "t": 130.28, + "r": 352.3, + "b": 194.46, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 194.46, + "r_x1": 352.3, + "r_y1": 194.46, + "r_x2": 352.3, + "r_y2": 130.28, + "r_x3": 338.59, + "r_y3": 130.28, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 338.59, + "t": 373.27, + "r": 352.3, + "b": 406.59, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 406.59, + "r_x1": 352.3, + "r_y1": 406.59, + "r_x2": 352.3, + "r_y2": 373.27, + "r_x3": 338.59, + "r_y3": 373.27, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 338.59, + "t": 486.9, + "r": 352.3, + "b": 518.61, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 518.61, + "r_x1": 352.3, + "r_y1": 518.61, + "r_x2": 352.3, + "r_y2": 486.9, + "r_x3": 338.59, + "r_y3": 486.9, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 265.46, + "t": 116.78, + "r": 279.18, + "b": 207.76, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 207.76, + "r_x1": 279.18, + "r_y1": 207.76, + "r_x2": 279.18, + "r_y2": 116.78, + "r_x3": 265.46, + "r_y3": 116.78, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 265.46, + "t": 251.78, + "r": 279.18, + "b": 299.73, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 299.73, + "r_x1": 279.18, + "r_y1": 299.73, + "r_x2": 279.18, + "r_y2": 251.78, + "r_x3": 265.46, + "r_y3": 251.78, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 265.46, + "t": 484.65, + "r": 279.18, + "b": 522.85, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 522.85, + "r_x1": 279.18, + "r_y1": 522.85, + "r_x2": 279.18, + "r_y2": 484.65, + "r_x3": 265.46, + "r_y3": 484.65, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] } ] }, "tablestructure": { - "table_map": {} + "table_map": { + "0": { + "label": "table", + "id": 0, + "page_no": 0, + "cluster": { + "id": 0, + "label": "table", + "bbox": { + "l": 233.88, + "t": 102.98, + "r": 519.9, + "b": 561.04, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.967, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 474.71, + "r_y0": 307.59, + "r_x1": 488.43, + "r_y1": 307.59, + "r_x2": 488.43, + "r_y2": 245.03, + "r_x3": 474.71, + "r_y3": 245.03, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 474.71, + "r_y0": 421.22, + "r_x1": 488.43, + "r_y1": 421.22, + "r_x2": 488.43, + "r_y2": 358.65, + "r_x3": 474.71, + "r_y3": 358.65, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 474.71, + "r_y0": 534.84, + "r_x1": 488.43, + "r_y1": 534.84, + "r_x2": 488.43, + "r_y2": 472.27, + "r_x3": 474.71, + "r_y3": 472.27, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 200.67, + "r_x1": 420.93, + "r_y1": 200.67, + "r_x2": 420.93, + "r_y2": 123.53, + "r_x3": 407.21, + "r_y3": 123.53, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 310.71, + "r_x1": 420.93, + "r_y1": 310.71, + "r_x2": 420.93, + "r_y2": 241.65, + "r_x3": 407.21, + "r_y3": 241.65, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 431.1, + "r_x1": 420.93, + "r_y1": 431.1, + "r_x2": 420.93, + "r_y2": 347.4, + "r_x3": 407.21, + "r_y3": 347.4, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 515.79, + "r_x1": 420.93, + "r_y1": 515.79, + "r_x2": 420.93, + "r_y2": 491.4, + "r_x3": 407.21, + "r_y3": 491.4, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 194.46, + "r_x1": 352.3, + "r_y1": 194.46, + "r_x2": 352.3, + "r_y2": 130.28, + "r_x3": 338.59, + "r_y3": 130.28, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 406.59, + "r_x1": 352.3, + "r_y1": 406.59, + "r_x2": 352.3, + "r_y2": 373.27, + "r_x3": 338.59, + "r_y3": 373.27, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 518.61, + "r_x1": 352.3, + "r_y1": 518.61, + "r_x2": 352.3, + "r_y2": 486.9, + "r_x3": 338.59, + "r_y3": 486.9, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 207.76, + "r_x1": 279.18, + "r_y1": 207.76, + "r_x2": 279.18, + "r_y2": 116.78, + "r_x3": 265.46, + "r_y3": 116.78, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 299.73, + "r_x1": 279.18, + "r_y1": 299.73, + "r_x2": 279.18, + "r_y2": 251.78, + "r_x3": 265.46, + "r_y3": 251.78, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 522.85, + "r_x1": 279.18, + "r_y1": 522.85, + "r_x2": 279.18, + "r_y2": 484.65, + "r_x3": 265.46, + "r_y3": 484.65, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [ + { + "id": 4, + "label": "text", + "bbox": { + "l": 474.71, + "t": 245.03, + "r": 488.43, + "b": 307.59, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 474.71, + "r_y0": 307.59, + "r_x1": 488.43, + "r_y1": 307.59, + "r_x2": 488.43, + "r_y2": 245.03, + "r_x3": 474.71, + "r_y3": 245.03, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 474.71, + "t": 358.65, + "r": 488.43, + "b": 421.22, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 474.71, + "r_y0": 421.22, + "r_x1": 488.43, + "r_y1": 421.22, + "r_x2": 488.43, + "r_y2": 358.65, + "r_x3": 474.71, + "r_y3": 358.65, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 474.71, + "t": 472.27, + "r": 488.43, + "b": 534.84, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 474.71, + "r_y0": 534.84, + "r_x1": 488.43, + "r_y1": 534.84, + "r_x2": 488.43, + "r_y2": 472.27, + "r_x3": 474.71, + "r_y3": 472.27, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 407.21, + "t": 123.53, + "r": 420.93, + "b": 200.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 200.67, + "r_x1": 420.93, + "r_y1": 200.67, + "r_x2": 420.93, + "r_y2": 123.53, + "r_x3": 407.21, + "r_y3": 123.53, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 407.21, + "t": 241.65, + "r": 420.93, + "b": 310.71, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 310.71, + "r_x1": 420.93, + "r_y1": 310.71, + "r_x2": 420.93, + "r_y2": 241.65, + "r_x3": 407.21, + "r_y3": 241.65, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 407.21, + "t": 347.4, + "r": 420.93, + "b": 431.1, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 431.1, + "r_x1": 420.93, + "r_y1": 431.1, + "r_x2": 420.93, + "r_y2": 347.4, + "r_x3": 407.21, + "r_y3": 347.4, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 407.21, + "t": 491.4, + "r": 420.93, + "b": 515.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 515.79, + "r_x1": 420.93, + "r_y1": 515.79, + "r_x2": 420.93, + "r_y2": 491.4, + "r_x3": 407.21, + "r_y3": 491.4, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 338.59, + "t": 130.28, + "r": 352.3, + "b": 194.46, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 194.46, + "r_x1": 352.3, + "r_y1": 194.46, + "r_x2": 352.3, + "r_y2": 130.28, + "r_x3": 338.59, + "r_y3": 130.28, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 338.59, + "t": 373.27, + "r": 352.3, + "b": 406.59, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 406.59, + "r_x1": 352.3, + "r_y1": 406.59, + "r_x2": 352.3, + "r_y2": 373.27, + "r_x3": 338.59, + "r_y3": 373.27, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 338.59, + "t": 486.9, + "r": 352.3, + "b": 518.61, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 518.61, + "r_x1": 352.3, + "r_y1": 518.61, + "r_x2": 352.3, + "r_y2": 486.9, + "r_x3": 338.59, + "r_y3": 486.9, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 265.46, + "t": 116.78, + "r": 279.18, + "b": 207.76, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 207.76, + "r_x1": 279.18, + "r_y1": 207.76, + "r_x2": 279.18, + "r_y2": 116.78, + "r_x3": 265.46, + "r_y3": 116.78, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 265.46, + "t": 251.78, + "r": 279.18, + "b": 299.73, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 299.73, + "r_x1": 279.18, + "r_y1": 299.73, + "r_x2": 279.18, + "r_y2": 251.78, + "r_x3": 265.46, + "r_y3": 251.78, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 265.46, + "t": 484.65, + "r": 279.18, + "b": 522.85, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 522.85, + "r_x1": 279.18, + "r_y1": 522.85, + "r_x2": 279.18, + "r_y2": 484.65, + "r_x3": 265.46, + "r_y3": 484.65, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "fcel", + "fcel", + "fcel", + "ecel", + "nl", + "fcel", + "ecel", + "fcel", + "fcel", + "nl", + "ecel", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 4, + "num_cols": 4, + "table_cells": [ + { + "bbox": { + "l": 474.71, + "t": 245.03, + "r": 488.43, + "b": 307.59, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 474.71, + "t": 358.65, + "r": 488.43, + "b": 421.22, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 474.71, + "t": 472.27, + "r": 488.43, + "b": 534.84, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 123.53, + "r": 420.93, + "b": 200.67, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "this is row 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 241.65, + "r": 420.93, + "b": 310.71, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 347.4, + "r": 420.93, + "b": 431.1, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 491.4, + "r": 420.93, + "b": 515.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 338.59, + "t": 130.28, + "r": 352.3, + "b": 194.46, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "and row 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 338.59, + "t": 373.27, + "r": 352.3, + "b": 406.59, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 338.59, + "t": 486.9, + "r": 352.3, + "b": 518.61, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 265.46, + "t": 116.78, + "r": 279.18, + "b": 207.76, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and last row 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 265.46, + "t": 251.78, + "r": 279.18, + "b": 299.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "nothing", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 265.46, + "t": 484.65, + "r": 279.18, + "b": 522.85, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "inside", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + } + } }, "figures_classification": null, "equations_prediction": null, @@ -262,20 +2409,20 @@ "assembled": { "elements": [ { - "label": "page_header", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 717.17, - "t": 70.9, - "r": 764.9, - "b": 504.87, + "l": 233.88, + "t": 102.98, + "r": 519.9, + "b": 561.04, "coord_origin": "TOPLEFT" }, - "confidence": 0.692, + "confidence": 0.967, "cells": [ { "index": 0, @@ -286,21 +2433,21 @@ "a": 255 }, "rect": { - "r_x0": 744.09, - "r_y0": 504.87, - "r_x1": 764.9, - "r_y1": 504.87, - "r_x2": 764.9, - "r_y2": 73.35, - "r_x3": 744.09, - "r_y3": 73.35, + "r_x0": 474.71, + "r_y0": 307.59, + "r_x1": 488.43, + "r_y1": 307.59, + "r_x2": 488.43, + "r_y2": 245.03, + "r_x3": 474.71, + "r_y3": 245.03, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Column 0", + "orig": "Column 0", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -311,43 +2458,22 @@ "a": 255 }, "rect": { - "r_x0": 717.17, - "r_y0": 504.87, - "r_x1": 737.97, - "r_y1": 504.87, - "r_x2": 737.97, - "r_y2": 70.9, - "r_x3": 717.17, - "r_y3": 70.9, + "r_x0": 474.71, + "r_y0": 421.22, + "r_x1": 488.43, + "r_y1": 421.22, + "r_x2": 488.43, + "r_y2": 358.65, + "r_x3": 474.71, + "r_y3": 358.65, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Column 1", + "orig": "Column 1", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "label": "text", - "id": 8, - "page_no": 0, - "cluster": { - "id": 8, - "label": "text", - "bbox": { - "l": 690.24, - "t": 72.12, - "r": 709.83, - "b": 152.81, - "coord_origin": "TOPLEFT" - }, - "confidence": 1.0, - "cells": [ + "from_ocr": false + }, { "index": 2, "rgba": { @@ -357,92 +2483,1088 @@ "a": 255 }, "rect": { - "r_x0": 690.24, - "r_y0": 152.81, - "r_x1": 709.83, - "r_y1": 152.81, - "r_x2": 709.83, - "r_y2": 72.12, - "r_x3": 690.24, - "r_y3": 72.12, + "r_x0": 474.71, + "r_y0": 534.84, + "r_x1": 488.43, + "r_y1": 534.84, + "r_x2": 488.43, + "r_y2": 472.27, + "r_x3": 474.71, + "r_y3": 472.27, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Column 2", + "orig": "Column 2", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 200.67, + "r_x1": 420.93, + "r_y1": 200.67, + "r_x2": 420.93, + "r_y2": 123.53, + "r_x3": 407.21, + "r_y3": 123.53, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 310.71, + "r_x1": 420.93, + "r_y1": 310.71, + "r_x2": 420.93, + "r_y2": 241.65, + "r_x3": 407.21, + "r_y3": 241.65, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 431.1, + "r_x1": 420.93, + "r_y1": 431.1, + "r_x2": 420.93, + "r_y2": 347.4, + "r_x3": 407.21, + "r_y3": 347.4, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 515.79, + "r_x1": 420.93, + "r_y1": 515.79, + "r_x2": 420.93, + "r_y2": 491.4, + "r_x3": 407.21, + "r_y3": 491.4, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 194.46, + "r_x1": 352.3, + "r_y1": 194.46, + "r_x2": 352.3, + "r_y2": 130.28, + "r_x3": 338.59, + "r_y3": 130.28, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 406.59, + "r_x1": 352.3, + "r_y1": 406.59, + "r_x2": 352.3, + "r_y2": 373.27, + "r_x3": 338.59, + "r_y3": 373.27, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 518.61, + "r_x1": 352.3, + "r_y1": 518.61, + "r_x2": 352.3, + "r_y2": 486.9, + "r_x3": 338.59, + "r_y3": 486.9, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 207.76, + "r_x1": 279.18, + "r_y1": 207.76, + "r_x2": 279.18, + "r_y2": 116.78, + "r_x3": 265.46, + "r_y3": 116.78, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 299.73, + "r_x1": 279.18, + "r_y1": 299.73, + "r_x2": 279.18, + "r_y2": 251.78, + "r_x3": 265.46, + "r_y3": 251.78, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 522.85, + "r_x1": 279.18, + "r_y1": 522.85, + "r_x2": 279.18, + "r_y2": 484.65, + "r_x3": 265.46, + "r_y3": 484.65, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], - "children": [] + "children": [ + { + "id": 4, + "label": "text", + "bbox": { + "l": 474.71, + "t": 245.03, + "r": 488.43, + "b": 307.59, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 474.71, + "r_y0": 307.59, + "r_x1": 488.43, + "r_y1": 307.59, + "r_x2": 488.43, + "r_y2": 245.03, + "r_x3": 474.71, + "r_y3": 245.03, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 474.71, + "t": 358.65, + "r": 488.43, + "b": 421.22, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 474.71, + "r_y0": 421.22, + "r_x1": 488.43, + "r_y1": 421.22, + "r_x2": 488.43, + "r_y2": 358.65, + "r_x3": 474.71, + "r_y3": 358.65, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 474.71, + "t": 472.27, + "r": 488.43, + "b": 534.84, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 474.71, + "r_y0": 534.84, + "r_x1": 488.43, + "r_y1": 534.84, + "r_x2": 488.43, + "r_y2": 472.27, + "r_x3": 474.71, + "r_y3": 472.27, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 407.21, + "t": 123.53, + "r": 420.93, + "b": 200.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 200.67, + "r_x1": 420.93, + "r_y1": 200.67, + "r_x2": 420.93, + "r_y2": 123.53, + "r_x3": 407.21, + "r_y3": 123.53, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 407.21, + "t": 241.65, + "r": 420.93, + "b": 310.71, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 310.71, + "r_x1": 420.93, + "r_y1": 310.71, + "r_x2": 420.93, + "r_y2": 241.65, + "r_x3": 407.21, + "r_y3": 241.65, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 407.21, + "t": 347.4, + "r": 420.93, + "b": 431.1, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 431.1, + "r_x1": 420.93, + "r_y1": 431.1, + "r_x2": 420.93, + "r_y2": 347.4, + "r_x3": 407.21, + "r_y3": 347.4, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 407.21, + "t": 491.4, + "r": 420.93, + "b": 515.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 515.79, + "r_x1": 420.93, + "r_y1": 515.79, + "r_x2": 420.93, + "r_y2": 491.4, + "r_x3": 407.21, + "r_y3": 491.4, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 338.59, + "t": 130.28, + "r": 352.3, + "b": 194.46, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 194.46, + "r_x1": 352.3, + "r_y1": 194.46, + "r_x2": 352.3, + "r_y2": 130.28, + "r_x3": 338.59, + "r_y3": 130.28, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 338.59, + "t": 373.27, + "r": 352.3, + "b": 406.59, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 406.59, + "r_x1": 352.3, + "r_y1": 406.59, + "r_x2": 352.3, + "r_y2": 373.27, + "r_x3": 338.59, + "r_y3": 373.27, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 338.59, + "t": 486.9, + "r": 352.3, + "b": 518.61, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 518.61, + "r_x1": 352.3, + "r_y1": 518.61, + "r_x2": 352.3, + "r_y2": 486.9, + "r_x3": 338.59, + "r_y3": 486.9, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 265.46, + "t": 116.78, + "r": 279.18, + "b": 207.76, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 207.76, + "r_x1": 279.18, + "r_y1": 207.76, + "r_x2": 279.18, + "r_y2": 116.78, + "r_x3": 265.46, + "r_y3": 116.78, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 265.46, + "t": 251.78, + "r": 279.18, + "b": 299.73, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 299.73, + "r_x1": 279.18, + "r_y1": 299.73, + "r_x2": 279.18, + "r_y2": 251.78, + "r_x3": 265.46, + "r_y3": 251.78, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 265.46, + "t": 484.65, + "r": 279.18, + "b": 522.85, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 522.85, + "r_x1": 279.18, + "r_y1": 522.85, + "r_x2": 279.18, + "r_y2": 484.65, + "r_x3": 265.46, + "r_y3": 484.65, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] }, - "text": "package" + "text": null, + "otsl_seq": [ + "fcel", + "fcel", + "fcel", + "ecel", + "nl", + "fcel", + "ecel", + "fcel", + "fcel", + "nl", + "ecel", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 4, + "num_cols": 4, + "table_cells": [ + { + "bbox": { + "l": 474.71, + "t": 245.03, + "r": 488.43, + "b": 307.59, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 474.71, + "t": 358.65, + "r": 488.43, + "b": 421.22, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 474.71, + "t": 472.27, + "r": 488.43, + "b": 534.84, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 123.53, + "r": 420.93, + "b": 200.67, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "this is row 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 241.65, + "r": 420.93, + "b": 310.71, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 347.4, + "r": 420.93, + "b": 431.1, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 491.4, + "r": 420.93, + "b": 515.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 338.59, + "t": 130.28, + "r": 352.3, + "b": 194.46, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "and row 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 338.59, + "t": 373.27, + "r": 352.3, + "b": 406.59, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 338.59, + "t": 486.9, + "r": 352.3, + "b": 518.61, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 265.46, + "t": 116.78, + "r": 279.18, + "b": 207.76, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and last row 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 265.46, + "t": 251.78, + "r": 279.18, + "b": 299.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "nothing", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 265.46, + "t": 484.65, + "r": 279.18, + "b": 522.85, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "inside", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } ], "body": [ { - "label": "text", - "id": 8, - "page_no": 0, - "cluster": { - "id": 8, - "label": "text", - "bbox": { - "l": 690.24, - "t": 72.12, - "r": 709.83, - "b": 152.81, - "coord_origin": "TOPLEFT" - }, - "confidence": 1.0, - "cells": [ - { - "index": 2, - "rgba": { - "r": 0, - "g": 0, - "b": 0, - "a": 255 - }, - "rect": { - "r_x0": 690.24, - "r_y0": 152.81, - "r_x1": 709.83, - "r_y1": 152.81, - "r_x2": 709.83, - "r_y2": 72.12, - "r_x3": 690.24, - "r_y3": 72.12, - "coord_origin": "TOPLEFT" - }, - "text": "package", - "orig": "package", - "text_direction": "left_to_right", - "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - "text": "package" - } - ], - "headers": [ - { - "label": "page_header", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 717.17, - "t": 70.9, - "r": 764.9, - "b": 504.87, + "l": 233.88, + "t": 102.98, + "r": 519.9, + "b": 561.04, "coord_origin": "TOPLEFT" }, - "confidence": 0.692, + "confidence": 0.967, "cells": [ { "index": 0, @@ -453,21 +3575,21 @@ "a": 255 }, "rect": { - "r_x0": 744.09, - "r_y0": 504.87, - "r_x1": 764.9, - "r_y1": 504.87, - "r_x2": 764.9, - "r_y2": 73.35, - "r_x3": 744.09, - "r_y3": 73.35, + "r_x0": 474.71, + "r_y0": 307.59, + "r_x1": 488.43, + "r_y1": 307.59, + "r_x2": 488.43, + "r_y2": 245.03, + "r_x3": 474.71, + "r_y3": 245.03, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Column 0", + "orig": "Column 0", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -478,28 +3600,1098 @@ "a": 255 }, "rect": { - "r_x0": 717.17, - "r_y0": 504.87, - "r_x1": 737.97, - "r_y1": 504.87, - "r_x2": 737.97, - "r_y2": 70.9, - "r_x3": 717.17, - "r_y3": 70.9, + "r_x0": 474.71, + "r_y0": 421.22, + "r_x1": 488.43, + "r_y1": 421.22, + "r_x2": 488.43, + "r_y2": 358.65, + "r_x3": 474.71, + "r_y3": 358.65, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Column 1", + "orig": "Column 1", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 474.71, + "r_y0": 534.84, + "r_x1": 488.43, + "r_y1": 534.84, + "r_x2": 488.43, + "r_y2": 472.27, + "r_x3": 474.71, + "r_y3": 472.27, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 200.67, + "r_x1": 420.93, + "r_y1": 200.67, + "r_x2": 420.93, + "r_y2": 123.53, + "r_x3": 407.21, + "r_y3": 123.53, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 310.71, + "r_x1": 420.93, + "r_y1": 310.71, + "r_x2": 420.93, + "r_y2": 241.65, + "r_x3": 407.21, + "r_y3": 241.65, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 431.1, + "r_x1": 420.93, + "r_y1": 431.1, + "r_x2": 420.93, + "r_y2": 347.4, + "r_x3": 407.21, + "r_y3": 347.4, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 515.79, + "r_x1": 420.93, + "r_y1": 515.79, + "r_x2": 420.93, + "r_y2": 491.4, + "r_x3": 407.21, + "r_y3": 491.4, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 194.46, + "r_x1": 352.3, + "r_y1": 194.46, + "r_x2": 352.3, + "r_y2": 130.28, + "r_x3": 338.59, + "r_y3": 130.28, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 406.59, + "r_x1": 352.3, + "r_y1": 406.59, + "r_x2": 352.3, + "r_y2": 373.27, + "r_x3": 338.59, + "r_y3": 373.27, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 518.61, + "r_x1": 352.3, + "r_y1": 518.61, + "r_x2": 352.3, + "r_y2": 486.9, + "r_x3": 338.59, + "r_y3": 486.9, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 207.76, + "r_x1": 279.18, + "r_y1": 207.76, + "r_x2": 279.18, + "r_y2": 116.78, + "r_x3": 265.46, + "r_y3": 116.78, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 299.73, + "r_x1": 279.18, + "r_y1": 299.73, + "r_x2": 279.18, + "r_y2": 251.78, + "r_x3": 265.46, + "r_y3": 251.78, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 522.85, + "r_x1": 279.18, + "r_y1": 522.85, + "r_x2": 279.18, + "r_y2": 484.65, + "r_x3": 265.46, + "r_y3": 484.65, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], - "children": [] + "children": [ + { + "id": 4, + "label": "text", + "bbox": { + "l": 474.71, + "t": 245.03, + "r": 488.43, + "b": 307.59, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 474.71, + "r_y0": 307.59, + "r_x1": 488.43, + "r_y1": 307.59, + "r_x2": 488.43, + "r_y2": 245.03, + "r_x3": 474.71, + "r_y3": 245.03, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 474.71, + "t": 358.65, + "r": 488.43, + "b": 421.22, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 474.71, + "r_y0": 421.22, + "r_x1": 488.43, + "r_y1": 421.22, + "r_x2": 488.43, + "r_y2": 358.65, + "r_x3": 474.71, + "r_y3": 358.65, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 474.71, + "t": 472.27, + "r": 488.43, + "b": 534.84, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 474.71, + "r_y0": 534.84, + "r_x1": 488.43, + "r_y1": 534.84, + "r_x2": 488.43, + "r_y2": 472.27, + "r_x3": 474.71, + "r_y3": 472.27, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 407.21, + "t": 123.53, + "r": 420.93, + "b": 200.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 200.67, + "r_x1": 420.93, + "r_y1": 200.67, + "r_x2": 420.93, + "r_y2": 123.53, + "r_x3": 407.21, + "r_y3": 123.53, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 407.21, + "t": 241.65, + "r": 420.93, + "b": 310.71, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 310.71, + "r_x1": 420.93, + "r_y1": 310.71, + "r_x2": 420.93, + "r_y2": 241.65, + "r_x3": 407.21, + "r_y3": 241.65, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 407.21, + "t": 347.4, + "r": 420.93, + "b": 431.1, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 431.1, + "r_x1": 420.93, + "r_y1": 431.1, + "r_x2": 420.93, + "r_y2": 347.4, + "r_x3": 407.21, + "r_y3": 347.4, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 407.21, + "t": 491.4, + "r": 420.93, + "b": 515.79, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 407.21, + "r_y0": 515.79, + "r_x1": 420.93, + "r_y1": 515.79, + "r_x2": 420.93, + "r_y2": 491.4, + "r_x3": 407.21, + "r_y3": 491.4, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 338.59, + "t": 130.28, + "r": 352.3, + "b": 194.46, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 194.46, + "r_x1": 352.3, + "r_y1": 194.46, + "r_x2": 352.3, + "r_y2": 130.28, + "r_x3": 338.59, + "r_y3": 130.28, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 338.59, + "t": 373.27, + "r": 352.3, + "b": 406.59, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 406.59, + "r_x1": 352.3, + "r_y1": 406.59, + "r_x2": 352.3, + "r_y2": 373.27, + "r_x3": 338.59, + "r_y3": 373.27, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 338.59, + "t": 486.9, + "r": 352.3, + "b": 518.61, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 338.59, + "r_y0": 518.61, + "r_x1": 352.3, + "r_y1": 518.61, + "r_x2": 352.3, + "r_y2": 486.9, + "r_x3": 338.59, + "r_y3": 486.9, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 265.46, + "t": 116.78, + "r": 279.18, + "b": 207.76, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 207.76, + "r_x1": 279.18, + "r_y1": 207.76, + "r_x2": 279.18, + "r_y2": 116.78, + "r_x3": 265.46, + "r_y3": 116.78, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 265.46, + "t": 251.78, + "r": 279.18, + "b": 299.73, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 299.73, + "r_x1": 279.18, + "r_y1": 299.73, + "r_x2": 279.18, + "r_y2": 251.78, + "r_x3": 265.46, + "r_y3": 251.78, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 265.46, + "t": 484.65, + "r": 279.18, + "b": 522.85, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.46, + "r_y0": 522.85, + "r_x1": 279.18, + "r_y1": 522.85, + "r_x2": 279.18, + "r_y2": 484.65, + "r_x3": 265.46, + "r_y3": 484.65, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" + "text": null, + "otsl_seq": [ + "fcel", + "fcel", + "fcel", + "ecel", + "nl", + "fcel", + "ecel", + "fcel", + "fcel", + "nl", + "ecel", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 4, + "num_cols": 4, + "table_cells": [ + { + "bbox": { + "l": 474.71, + "t": 245.03, + "r": 488.43, + "b": 307.59, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 474.71, + "t": 358.65, + "r": 488.43, + "b": 421.22, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 474.71, + "t": 472.27, + "r": 488.43, + "b": 534.84, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Column 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 123.53, + "r": 420.93, + "b": 200.67, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "this is row 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 241.65, + "r": 420.93, + "b": 310.71, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 347.4, + "r": 420.93, + "b": 431.1, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 407.21, + "t": 491.4, + "r": 420.93, + "b": 515.79, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 338.59, + "t": 130.28, + "r": 352.3, + "b": 194.46, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "and row 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 338.59, + "t": 373.27, + "r": 352.3, + "b": 406.59, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 338.59, + "t": 486.9, + "r": 352.3, + "b": 518.61, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 265.46, + "t": 116.78, + "r": 279.18, + "b": 207.76, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "and last row 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 265.46, + "t": 251.78, + "r": 279.18, + "b": 299.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "nothing", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 265.46, + "t": 484.65, + "r": 279.18, + "b": 522.85, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "inside", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } - ] + ], + "headers": [] } } ] \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.doctags.txt b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.doctags.txt index d8b87216..b8f362fc 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.doctags.txt +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.doctags.txt @@ -1,3 +1,2 @@ -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained -package +Column 2andhaveinsideColumn 1have contentotherColumn 0some cellsnothingthis is row 0and row 1and last row 2 \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.json index d1b4d37e..19e3d0e1 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.json @@ -4,7 +4,7 @@ "name": "ocr_test_rotated_90", "origin": { "mimetype": "application/pdf", - "binary_hash": 6989291015361162334, + "binary_hash": 18214570700708620554, "filename": "ocr_test_rotated_90.pdf" }, "furniture": { @@ -18,10 +18,7 @@ "self_ref": "#/body", "children": [ { - "$ref": "#/texts/0" - }, - { - "$ref": "#/texts/1" + "$ref": "#/tables/0" } ], "content_layer": "body", @@ -29,71 +26,592 @@ "label": "unspecified" }, "groups": [], - "texts": [ + "texts": [], + "pictures": [], + "tables": [ { - "self_ref": "#/texts/0", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "furniture", - "label": "page_header", - "prov": [ - { - "page_no": 1, - "bbox": { - "l": 77.1, - "t": 506.07, - "r": 126.08, - "b": 71.88, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 86 - ] - } - ], - "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "self_ref": "#/texts/1", + "self_ref": "#/tables/0", "parent": { "$ref": "#/body" }, "children": [], "content_layer": "body", - "label": "text", + "label": "table", "prov": [ { "page_no": 1, "bbox": { - "l": 131.21, - "t": 154.19, - "r": 152.2, - "b": 74.12, + "l": 75.13, + "t": 562.14, + "r": 361.19, + "b": 103.0, "coord_origin": "BOTTOMLEFT" }, "charspan": [ 0, - 7 + 0 ] } ], - "orig": "package", - "text": "package" + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "bbox": { + "l": 106.57, + "t": 534.41, + "r": 120.29, + "b": 596.98, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 106.57, + "t": 420.78, + "r": 120.29, + "b": 483.35, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 106.57, + "t": 307.16, + "r": 120.29, + "b": 369.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 641.33, + "r": 187.79, + "b": 718.48, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "this is row 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 531.29, + "r": 187.79, + "b": 600.35, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 410.9, + "r": 187.79, + "b": 494.6, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 326.21, + "r": 187.79, + "b": 350.6, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 242.7, + "t": 647.54, + "r": 256.41, + "b": 711.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "and row 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 242.7, + "t": 435.41, + "r": 256.41, + "b": 468.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 242.7, + "t": 323.39, + "r": 256.41, + "b": 355.1, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 315.82, + "t": 634.24, + "r": 329.54, + "b": 725.23, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and last row 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 315.82, + "t": 542.27, + "r": 329.54, + "b": 590.23, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "nothing", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 315.82, + "t": 319.15, + "r": 329.54, + "b": 357.35, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "inside", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 4, + "num_cols": 4, + "grid": [ + [ + { + "bbox": { + "l": 106.57, + "t": 307.16, + "r": 120.29, + "b": 369.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 326.21, + "r": 187.79, + "b": 350.6, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 242.7, + "t": 323.39, + "r": 256.41, + "b": 355.1, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 315.82, + "t": 319.15, + "r": 329.54, + "b": 357.35, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "inside", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 106.57, + "t": 420.78, + "r": 120.29, + "b": 483.35, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 410.9, + "r": 187.79, + "b": 494.6, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 242.7, + "t": 435.41, + "r": 256.41, + "b": 468.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 106.57, + "t": 534.41, + "r": 120.29, + "b": 596.98, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 531.29, + "r": 187.79, + "b": 600.35, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 315.82, + "t": 542.27, + "r": 329.54, + "b": 590.23, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "nothing", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 641.33, + "r": 187.79, + "b": 718.48, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "this is row 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 242.7, + "t": 647.54, + "r": 256.41, + "b": 711.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "and row 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 315.82, + "t": 634.24, + "r": 329.54, + "b": 725.23, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and last row 2", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + }, + "annotations": [] } ], - "pictures": [], - "tables": [], "key_value_items": [], "form_items": [], "pages": { "1": { "size": { - "width": 841.92, - "height": 595.2 + "width": 595.0, + "height": 842.0 }, "page_no": 1 } diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.pages.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.pages.json index 250f4bf6..cff232ce 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.pages.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.pages.json @@ -2,8 +2,8 @@ { "page_no": 0, "size": { - "width": 841.92, - "height": 595.2 + "width": 595.0, + "height": 842.0 }, "parsed_page": { "dimension": { @@ -11,47 +11,47 @@ "rect": { "r_x0": 0.0, "r_y0": 0.0, - "r_x1": 595.2, + "r_x1": 842.0, "r_y1": 0.0, - "r_x2": 595.2, - "r_y2": 841.92, + "r_x2": 842.0, + "r_y2": 595.0, "r_x3": 0.0, - "r_y3": 841.92, + "r_y3": 595.0, "coord_origin": "BOTTOMLEFT" }, "boundary_type": "crop_box", "art_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "bleed_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "crop_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "media_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "trim_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 595.0, + "r": 842.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" } @@ -69,21 +69,21 @@ "a": 255 }, "rect": { - "r_x0": 77.1, - "r_y0": 520.76, - "r_x1": 96.68, - "r_y1": 520.76, - "r_x2": 96.68, - "r_y2": 89.24, - "r_x3": 77.1, - "r_y3": 89.24, + "r_x0": 106.57, + "r_y0": 596.98, + "r_x1": 120.29, + "r_y1": 596.98, + "r_x2": 120.29, + "r_y2": 534.41, + "r_x3": 106.57, + "r_y3": 534.41, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Column 0", + "orig": "Column 0", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -94,21 +94,21 @@ "a": 255 }, "rect": { - "r_x0": 100.64, - "r_y0": 523.32, - "r_x1": 126.08, - "r_y1": 523.32, - "r_x2": 126.08, - "r_y2": 89.13, - "r_x3": 100.64, - "r_y3": 89.13, + "r_x0": 106.57, + "r_y0": 483.35, + "r_x1": 120.29, + "r_y1": 483.35, + "r_x2": 120.29, + "r_y2": 420.78, + "r_x3": 106.57, + "r_y3": 420.78, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Column 1", + "orig": "Column 1", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 2, @@ -119,21 +119,271 @@ "a": 255 }, "rect": { - "r_x0": 131.21, - "r_y0": 521.08, - "r_x1": 152.2, - "r_y1": 521.08, - "r_x2": 152.2, - "r_y2": 441.01, - "r_x3": 131.21, - "r_y3": 441.01, + "r_x0": 106.57, + "r_y0": 369.73, + "r_x1": 120.29, + "r_y1": 369.73, + "r_x2": 120.29, + "r_y2": 307.16, + "r_x3": 106.57, + "r_y3": 307.16, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Column 2", + "orig": "Column 2", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 718.48, + "r_x1": 187.79, + "r_y1": 718.48, + "r_x2": 187.79, + "r_y2": 641.33, + "r_x3": 174.07, + "r_y3": 641.33, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 600.35, + "r_x1": 187.79, + "r_y1": 600.35, + "r_x2": 187.79, + "r_y2": 531.29, + "r_x3": 174.07, + "r_y3": 531.29, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 494.6, + "r_x1": 187.79, + "r_y1": 494.6, + "r_x2": 187.79, + "r_y2": 410.9, + "r_x3": 174.07, + "r_y3": 410.9, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 350.6, + "r_x1": 187.79, + "r_y1": 350.6, + "r_x2": 187.79, + "r_y2": 326.21, + "r_x3": 174.07, + "r_y3": 326.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 711.73, + "r_x1": 256.41, + "r_y1": 711.73, + "r_x2": 256.41, + "r_y2": 647.54, + "r_x3": 242.7, + "r_y3": 647.54, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 468.73, + "r_x1": 256.41, + "r_y1": 468.73, + "r_x2": 256.41, + "r_y2": 435.41, + "r_x3": 242.7, + "r_y3": 435.41, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 355.1, + "r_x1": 256.41, + "r_y1": 355.1, + "r_x2": 256.41, + "r_y2": 323.39, + "r_x3": 242.7, + "r_y3": 323.39, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 725.23, + "r_x1": 329.54, + "r_y1": 725.23, + "r_x2": 329.54, + "r_y2": 634.24, + "r_x3": 315.82, + "r_y3": 634.24, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 590.23, + "r_x1": 329.54, + "r_y1": 590.23, + "r_x2": 329.54, + "r_y2": 542.27, + "r_x3": 315.82, + "r_y3": 542.27, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 357.35, + "r_x1": 329.54, + "r_y1": 357.35, + "r_x2": 329.54, + "r_y2": 319.15, + "r_x3": 315.82, + "r_y3": 319.15, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], "has_chars": false, @@ -147,15 +397,15 @@ "clusters": [ { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 77.1, - "t": 89.13, - "r": 126.08, - "b": 523.32, + "l": 75.13, + "t": 279.86, + "r": 361.19, + "b": 739.0, "coord_origin": "TOPLEFT" }, - "confidence": 0.602, + "confidence": 0.947, "cells": [ { "index": 0, @@ -166,21 +416,21 @@ "a": 255 }, "rect": { - "r_x0": 77.1, - "r_y0": 520.76, - "r_x1": 96.68, - "r_y1": 520.76, - "r_x2": 96.68, - "r_y2": 89.24, - "r_x3": 77.1, - "r_y3": 89.24, + "r_x0": 106.57, + "r_y0": 596.98, + "r_x1": 120.29, + "r_y1": 596.98, + "r_x2": 120.29, + "r_y2": 534.41, + "r_x3": 106.57, + "r_y3": 534.41, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Column 0", + "orig": "Column 0", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -191,37 +441,22 @@ "a": 255 }, "rect": { - "r_x0": 100.64, - "r_y0": 523.32, - "r_x1": 126.08, - "r_y1": 523.32, - "r_x2": 126.08, - "r_y2": 89.13, - "r_x3": 100.64, - "r_y3": 89.13, + "r_x0": 106.57, + "r_y0": 483.35, + "r_x1": 120.29, + "r_y1": 483.35, + "r_x2": 120.29, + "r_y2": 420.78, + "r_x3": 106.57, + "r_y3": 420.78, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Column 1", + "orig": "Column 1", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - { - "id": 1, - "label": "text", - "bbox": { - "l": 131.21, - "t": 441.01, - "r": 152.2, - "b": 521.08, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.523, - "cells": [ + "from_ocr": false + }, { "index": 2, "rgba": { @@ -231,29 +466,1941 @@ "a": 255 }, "rect": { - "r_x0": 131.21, - "r_y0": 521.08, - "r_x1": 152.2, - "r_y1": 521.08, - "r_x2": 152.2, - "r_y2": 441.01, - "r_x3": 131.21, - "r_y3": 441.01, + "r_x0": 106.57, + "r_y0": 369.73, + "r_x1": 120.29, + "r_y1": 369.73, + "r_x2": 120.29, + "r_y2": 307.16, + "r_x3": 106.57, + "r_y3": 307.16, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Column 2", + "orig": "Column 2", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 718.48, + "r_x1": 187.79, + "r_y1": 718.48, + "r_x2": 187.79, + "r_y2": 641.33, + "r_x3": 174.07, + "r_y3": 641.33, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 600.35, + "r_x1": 187.79, + "r_y1": 600.35, + "r_x2": 187.79, + "r_y2": 531.29, + "r_x3": 174.07, + "r_y3": 531.29, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 494.6, + "r_x1": 187.79, + "r_y1": 494.6, + "r_x2": 187.79, + "r_y2": 410.9, + "r_x3": 174.07, + "r_y3": 410.9, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 350.6, + "r_x1": 187.79, + "r_y1": 350.6, + "r_x2": 187.79, + "r_y2": 326.21, + "r_x3": 174.07, + "r_y3": 326.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 711.73, + "r_x1": 256.41, + "r_y1": 711.73, + "r_x2": 256.41, + "r_y2": 647.54, + "r_x3": 242.7, + "r_y3": 647.54, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 468.73, + "r_x1": 256.41, + "r_y1": 468.73, + "r_x2": 256.41, + "r_y2": 435.41, + "r_x3": 242.7, + "r_y3": 435.41, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 355.1, + "r_x1": 256.41, + "r_y1": 355.1, + "r_x2": 256.41, + "r_y2": 323.39, + "r_x3": 242.7, + "r_y3": 323.39, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 725.23, + "r_x1": 329.54, + "r_y1": 725.23, + "r_x2": 329.54, + "r_y2": 634.24, + "r_x3": 315.82, + "r_y3": 634.24, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 590.23, + "r_x1": 329.54, + "r_y1": 590.23, + "r_x2": 329.54, + "r_y2": 542.27, + "r_x3": 315.82, + "r_y3": 542.27, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 357.35, + "r_x1": 329.54, + "r_y1": 357.35, + "r_x2": 329.54, + "r_y2": 319.15, + "r_x3": 315.82, + "r_y3": 319.15, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], - "children": [] + "children": [ + { + "id": 11, + "label": "text", + "bbox": { + "l": 106.57, + "t": 534.41, + "r": 120.29, + "b": 596.98, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 106.57, + "r_y0": 596.98, + "r_x1": 120.29, + "r_y1": 596.98, + "r_x2": 120.29, + "r_y2": 534.41, + "r_x3": 106.57, + "r_y3": 534.41, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 106.57, + "t": 420.78, + "r": 120.29, + "b": 483.35, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 106.57, + "r_y0": 483.35, + "r_x1": 120.29, + "r_y1": 483.35, + "r_x2": 120.29, + "r_y2": 420.78, + "r_x3": 106.57, + "r_y3": 420.78, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 106.57, + "t": 307.16, + "r": 120.29, + "b": 369.73, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 106.57, + "r_y0": 369.73, + "r_x1": 120.29, + "r_y1": 369.73, + "r_x2": 120.29, + "r_y2": 307.16, + "r_x3": 106.57, + "r_y3": 307.16, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 174.07, + "t": 641.33, + "r": 187.79, + "b": 718.48, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 718.48, + "r_x1": 187.79, + "r_y1": 718.48, + "r_x2": 187.79, + "r_y2": 641.33, + "r_x3": 174.07, + "r_y3": 641.33, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 174.07, + "t": 531.29, + "r": 187.79, + "b": 600.35, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 600.35, + "r_x1": 187.79, + "r_y1": 600.35, + "r_x2": 187.79, + "r_y2": 531.29, + "r_x3": 174.07, + "r_y3": 531.29, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 174.07, + "t": 410.9, + "r": 187.79, + "b": 494.6, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 494.6, + "r_x1": 187.79, + "r_y1": 494.6, + "r_x2": 187.79, + "r_y2": 410.9, + "r_x3": 174.07, + "r_y3": 410.9, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 17, + "label": "text", + "bbox": { + "l": 174.07, + "t": 326.21, + "r": 187.79, + "b": 350.6, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 350.6, + "r_x1": 187.79, + "r_y1": 350.6, + "r_x2": 187.79, + "r_y2": 326.21, + "r_x3": 174.07, + "r_y3": 326.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 18, + "label": "text", + "bbox": { + "l": 242.7, + "t": 647.54, + "r": 256.41, + "b": 711.73, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 711.73, + "r_x1": 256.41, + "r_y1": 711.73, + "r_x2": 256.41, + "r_y2": 647.54, + "r_x3": 242.7, + "r_y3": 647.54, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 19, + "label": "text", + "bbox": { + "l": 242.7, + "t": 435.41, + "r": 256.41, + "b": 468.73, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 468.73, + "r_x1": 256.41, + "r_y1": 468.73, + "r_x2": 256.41, + "r_y2": 435.41, + "r_x3": 242.7, + "r_y3": 435.41, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 20, + "label": "text", + "bbox": { + "l": 242.7, + "t": 323.39, + "r": 256.41, + "b": 355.1, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 355.1, + "r_x1": 256.41, + "r_y1": 355.1, + "r_x2": 256.41, + "r_y2": 323.39, + "r_x3": 242.7, + "r_y3": 323.39, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 21, + "label": "text", + "bbox": { + "l": 315.82, + "t": 634.24, + "r": 329.54, + "b": 725.23, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 725.23, + "r_x1": 329.54, + "r_y1": 725.23, + "r_x2": 329.54, + "r_y2": 634.24, + "r_x3": 315.82, + "r_y3": 634.24, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 22, + "label": "text", + "bbox": { + "l": 315.82, + "t": 542.27, + "r": 329.54, + "b": 590.23, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 590.23, + "r_x1": 329.54, + "r_y1": 590.23, + "r_x2": 329.54, + "r_y2": 542.27, + "r_x3": 315.82, + "r_y3": 542.27, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 23, + "label": "text", + "bbox": { + "l": 315.82, + "t": 319.15, + "r": 329.54, + "b": 357.35, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 357.35, + "r_x1": 329.54, + "r_y1": 357.35, + "r_x2": 329.54, + "r_y2": 319.15, + "r_x3": 315.82, + "r_y3": 319.15, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] } ] }, "tablestructure": { - "table_map": {} + "table_map": { + "0": { + "label": "table", + "id": 0, + "page_no": 0, + "cluster": { + "id": 0, + "label": "table", + "bbox": { + "l": 75.13, + "t": 279.86, + "r": 361.19, + "b": 739.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.947, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 106.57, + "r_y0": 596.98, + "r_x1": 120.29, + "r_y1": 596.98, + "r_x2": 120.29, + "r_y2": 534.41, + "r_x3": 106.57, + "r_y3": 534.41, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 106.57, + "r_y0": 483.35, + "r_x1": 120.29, + "r_y1": 483.35, + "r_x2": 120.29, + "r_y2": 420.78, + "r_x3": 106.57, + "r_y3": 420.78, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 106.57, + "r_y0": 369.73, + "r_x1": 120.29, + "r_y1": 369.73, + "r_x2": 120.29, + "r_y2": 307.16, + "r_x3": 106.57, + "r_y3": 307.16, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 718.48, + "r_x1": 187.79, + "r_y1": 718.48, + "r_x2": 187.79, + "r_y2": 641.33, + "r_x3": 174.07, + "r_y3": 641.33, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 600.35, + "r_x1": 187.79, + "r_y1": 600.35, + "r_x2": 187.79, + "r_y2": 531.29, + "r_x3": 174.07, + "r_y3": 531.29, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 494.6, + "r_x1": 187.79, + "r_y1": 494.6, + "r_x2": 187.79, + "r_y2": 410.9, + "r_x3": 174.07, + "r_y3": 410.9, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 350.6, + "r_x1": 187.79, + "r_y1": 350.6, + "r_x2": 187.79, + "r_y2": 326.21, + "r_x3": 174.07, + "r_y3": 326.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 711.73, + "r_x1": 256.41, + "r_y1": 711.73, + "r_x2": 256.41, + "r_y2": 647.54, + "r_x3": 242.7, + "r_y3": 647.54, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 468.73, + "r_x1": 256.41, + "r_y1": 468.73, + "r_x2": 256.41, + "r_y2": 435.41, + "r_x3": 242.7, + "r_y3": 435.41, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 355.1, + "r_x1": 256.41, + "r_y1": 355.1, + "r_x2": 256.41, + "r_y2": 323.39, + "r_x3": 242.7, + "r_y3": 323.39, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 725.23, + "r_x1": 329.54, + "r_y1": 725.23, + "r_x2": 329.54, + "r_y2": 634.24, + "r_x3": 315.82, + "r_y3": 634.24, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 590.23, + "r_x1": 329.54, + "r_y1": 590.23, + "r_x2": 329.54, + "r_y2": 542.27, + "r_x3": 315.82, + "r_y3": 542.27, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 357.35, + "r_x1": 329.54, + "r_y1": 357.35, + "r_x2": 329.54, + "r_y2": 319.15, + "r_x3": 315.82, + "r_y3": 319.15, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [ + { + "id": 11, + "label": "text", + "bbox": { + "l": 106.57, + "t": 534.41, + "r": 120.29, + "b": 596.98, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 106.57, + "r_y0": 596.98, + "r_x1": 120.29, + "r_y1": 596.98, + "r_x2": 120.29, + "r_y2": 534.41, + "r_x3": 106.57, + "r_y3": 534.41, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 106.57, + "t": 420.78, + "r": 120.29, + "b": 483.35, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 106.57, + "r_y0": 483.35, + "r_x1": 120.29, + "r_y1": 483.35, + "r_x2": 120.29, + "r_y2": 420.78, + "r_x3": 106.57, + "r_y3": 420.78, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 106.57, + "t": 307.16, + "r": 120.29, + "b": 369.73, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 106.57, + "r_y0": 369.73, + "r_x1": 120.29, + "r_y1": 369.73, + "r_x2": 120.29, + "r_y2": 307.16, + "r_x3": 106.57, + "r_y3": 307.16, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 174.07, + "t": 641.33, + "r": 187.79, + "b": 718.48, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 718.48, + "r_x1": 187.79, + "r_y1": 718.48, + "r_x2": 187.79, + "r_y2": 641.33, + "r_x3": 174.07, + "r_y3": 641.33, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 174.07, + "t": 531.29, + "r": 187.79, + "b": 600.35, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 600.35, + "r_x1": 187.79, + "r_y1": 600.35, + "r_x2": 187.79, + "r_y2": 531.29, + "r_x3": 174.07, + "r_y3": 531.29, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 174.07, + "t": 410.9, + "r": 187.79, + "b": 494.6, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 494.6, + "r_x1": 187.79, + "r_y1": 494.6, + "r_x2": 187.79, + "r_y2": 410.9, + "r_x3": 174.07, + "r_y3": 410.9, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 17, + "label": "text", + "bbox": { + "l": 174.07, + "t": 326.21, + "r": 187.79, + "b": 350.6, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 350.6, + "r_x1": 187.79, + "r_y1": 350.6, + "r_x2": 187.79, + "r_y2": 326.21, + "r_x3": 174.07, + "r_y3": 326.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 18, + "label": "text", + "bbox": { + "l": 242.7, + "t": 647.54, + "r": 256.41, + "b": 711.73, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 711.73, + "r_x1": 256.41, + "r_y1": 711.73, + "r_x2": 256.41, + "r_y2": 647.54, + "r_x3": 242.7, + "r_y3": 647.54, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 19, + "label": "text", + "bbox": { + "l": 242.7, + "t": 435.41, + "r": 256.41, + "b": 468.73, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 468.73, + "r_x1": 256.41, + "r_y1": 468.73, + "r_x2": 256.41, + "r_y2": 435.41, + "r_x3": 242.7, + "r_y3": 435.41, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 20, + "label": "text", + "bbox": { + "l": 242.7, + "t": 323.39, + "r": 256.41, + "b": 355.1, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 355.1, + "r_x1": 256.41, + "r_y1": 355.1, + "r_x2": 256.41, + "r_y2": 323.39, + "r_x3": 242.7, + "r_y3": 323.39, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 21, + "label": "text", + "bbox": { + "l": 315.82, + "t": 634.24, + "r": 329.54, + "b": 725.23, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 725.23, + "r_x1": 329.54, + "r_y1": 725.23, + "r_x2": 329.54, + "r_y2": 634.24, + "r_x3": 315.82, + "r_y3": 634.24, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 22, + "label": "text", + "bbox": { + "l": 315.82, + "t": 542.27, + "r": 329.54, + "b": 590.23, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 590.23, + "r_x1": 329.54, + "r_y1": 590.23, + "r_x2": 329.54, + "r_y2": 542.27, + "r_x3": 315.82, + "r_y3": 542.27, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 23, + "label": "text", + "bbox": { + "l": 315.82, + "t": 319.15, + "r": 329.54, + "b": 357.35, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 357.35, + "r_x1": 329.54, + "r_y1": 357.35, + "r_x2": 329.54, + "r_y2": 319.15, + "r_x3": 315.82, + "r_y3": 319.15, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "fcel", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "ecel", + "nl", + "fcel", + "fcel", + "ecel", + "fcel", + "nl", + "ecel", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 4, + "num_cols": 4, + "table_cells": [ + { + "bbox": { + "l": 106.57, + "t": 534.41, + "r": 120.29, + "b": 596.98, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 106.57, + "t": 420.78, + "r": 120.29, + "b": 483.35, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 106.57, + "t": 307.16, + "r": 120.29, + "b": 369.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 641.33, + "r": 187.79, + "b": 718.48, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "this is row 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 531.29, + "r": 187.79, + "b": 600.35, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 410.9, + "r": 187.79, + "b": 494.6, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 326.21, + "r": 187.79, + "b": 350.6, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 242.7, + "t": 647.54, + "r": 256.41, + "b": 711.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "and row 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 242.7, + "t": 435.41, + "r": 256.41, + "b": 468.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 242.7, + "t": 323.39, + "r": 256.41, + "b": 355.1, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 315.82, + "t": 634.24, + "r": 329.54, + "b": 725.23, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and last row 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 315.82, + "t": 542.27, + "r": 329.54, + "b": 590.23, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "nothing", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 315.82, + "t": 319.15, + "r": 329.54, + "b": 357.35, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "inside", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + } + } }, "figures_classification": null, "equations_prediction": null, @@ -262,20 +2409,20 @@ "assembled": { "elements": [ { - "label": "page_header", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 77.1, - "t": 89.13, - "r": 126.08, - "b": 523.32, + "l": 75.13, + "t": 279.86, + "r": 361.19, + "b": 739.0, "coord_origin": "TOPLEFT" }, - "confidence": 0.602, + "confidence": 0.947, "cells": [ { "index": 0, @@ -286,21 +2433,21 @@ "a": 255 }, "rect": { - "r_x0": 77.1, - "r_y0": 520.76, - "r_x1": 96.68, - "r_y1": 520.76, - "r_x2": 96.68, - "r_y2": 89.24, - "r_x3": 77.1, - "r_y3": 89.24, + "r_x0": 106.57, + "r_y0": 596.98, + "r_x1": 120.29, + "r_y1": 596.98, + "r_x2": 120.29, + "r_y2": 534.41, + "r_x3": 106.57, + "r_y3": 534.41, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Column 0", + "orig": "Column 0", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -311,43 +2458,22 @@ "a": 255 }, "rect": { - "r_x0": 100.64, - "r_y0": 523.32, - "r_x1": 126.08, - "r_y1": 523.32, - "r_x2": 126.08, - "r_y2": 89.13, - "r_x3": 100.64, - "r_y3": 89.13, + "r_x0": 106.57, + "r_y0": 483.35, + "r_x1": 120.29, + "r_y1": 483.35, + "r_x2": 120.29, + "r_y2": 420.78, + "r_x3": 106.57, + "r_y3": 420.78, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Column 1", + "orig": "Column 1", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "label": "text", - "id": 1, - "page_no": 0, - "cluster": { - "id": 1, - "label": "text", - "bbox": { - "l": 131.21, - "t": 441.01, - "r": 152.2, - "b": 521.08, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.523, - "cells": [ + "from_ocr": false + }, { "index": 2, "rgba": { @@ -357,92 +2483,1088 @@ "a": 255 }, "rect": { - "r_x0": 131.21, - "r_y0": 521.08, - "r_x1": 152.2, - "r_y1": 521.08, - "r_x2": 152.2, - "r_y2": 441.01, - "r_x3": 131.21, - "r_y3": 441.01, + "r_x0": 106.57, + "r_y0": 369.73, + "r_x1": 120.29, + "r_y1": 369.73, + "r_x2": 120.29, + "r_y2": 307.16, + "r_x3": 106.57, + "r_y3": 307.16, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "Column 2", + "orig": "Column 2", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 718.48, + "r_x1": 187.79, + "r_y1": 718.48, + "r_x2": 187.79, + "r_y2": 641.33, + "r_x3": 174.07, + "r_y3": 641.33, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 600.35, + "r_x1": 187.79, + "r_y1": 600.35, + "r_x2": 187.79, + "r_y2": 531.29, + "r_x3": 174.07, + "r_y3": 531.29, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 494.6, + "r_x1": 187.79, + "r_y1": 494.6, + "r_x2": 187.79, + "r_y2": 410.9, + "r_x3": 174.07, + "r_y3": 410.9, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 350.6, + "r_x1": 187.79, + "r_y1": 350.6, + "r_x2": 187.79, + "r_y2": 326.21, + "r_x3": 174.07, + "r_y3": 326.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 711.73, + "r_x1": 256.41, + "r_y1": 711.73, + "r_x2": 256.41, + "r_y2": 647.54, + "r_x3": 242.7, + "r_y3": 647.54, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 468.73, + "r_x1": 256.41, + "r_y1": 468.73, + "r_x2": 256.41, + "r_y2": 435.41, + "r_x3": 242.7, + "r_y3": 435.41, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 355.1, + "r_x1": 256.41, + "r_y1": 355.1, + "r_x2": 256.41, + "r_y2": 323.39, + "r_x3": 242.7, + "r_y3": 323.39, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 725.23, + "r_x1": 329.54, + "r_y1": 725.23, + "r_x2": 329.54, + "r_y2": 634.24, + "r_x3": 315.82, + "r_y3": 634.24, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 590.23, + "r_x1": 329.54, + "r_y1": 590.23, + "r_x2": 329.54, + "r_y2": 542.27, + "r_x3": 315.82, + "r_y3": 542.27, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 357.35, + "r_x1": 329.54, + "r_y1": 357.35, + "r_x2": 329.54, + "r_y2": 319.15, + "r_x3": 315.82, + "r_y3": 319.15, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], - "children": [] + "children": [ + { + "id": 11, + "label": "text", + "bbox": { + "l": 106.57, + "t": 534.41, + "r": 120.29, + "b": 596.98, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 106.57, + "r_y0": 596.98, + "r_x1": 120.29, + "r_y1": 596.98, + "r_x2": 120.29, + "r_y2": 534.41, + "r_x3": 106.57, + "r_y3": 534.41, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 106.57, + "t": 420.78, + "r": 120.29, + "b": 483.35, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 106.57, + "r_y0": 483.35, + "r_x1": 120.29, + "r_y1": 483.35, + "r_x2": 120.29, + "r_y2": 420.78, + "r_x3": 106.57, + "r_y3": 420.78, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 106.57, + "t": 307.16, + "r": 120.29, + "b": 369.73, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 106.57, + "r_y0": 369.73, + "r_x1": 120.29, + "r_y1": 369.73, + "r_x2": 120.29, + "r_y2": 307.16, + "r_x3": 106.57, + "r_y3": 307.16, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 174.07, + "t": 641.33, + "r": 187.79, + "b": 718.48, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 718.48, + "r_x1": 187.79, + "r_y1": 718.48, + "r_x2": 187.79, + "r_y2": 641.33, + "r_x3": 174.07, + "r_y3": 641.33, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 174.07, + "t": 531.29, + "r": 187.79, + "b": 600.35, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 600.35, + "r_x1": 187.79, + "r_y1": 600.35, + "r_x2": 187.79, + "r_y2": 531.29, + "r_x3": 174.07, + "r_y3": 531.29, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 174.07, + "t": 410.9, + "r": 187.79, + "b": 494.6, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 494.6, + "r_x1": 187.79, + "r_y1": 494.6, + "r_x2": 187.79, + "r_y2": 410.9, + "r_x3": 174.07, + "r_y3": 410.9, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 17, + "label": "text", + "bbox": { + "l": 174.07, + "t": 326.21, + "r": 187.79, + "b": 350.6, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 350.6, + "r_x1": 187.79, + "r_y1": 350.6, + "r_x2": 187.79, + "r_y2": 326.21, + "r_x3": 174.07, + "r_y3": 326.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 18, + "label": "text", + "bbox": { + "l": 242.7, + "t": 647.54, + "r": 256.41, + "b": 711.73, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 711.73, + "r_x1": 256.41, + "r_y1": 711.73, + "r_x2": 256.41, + "r_y2": 647.54, + "r_x3": 242.7, + "r_y3": 647.54, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 19, + "label": "text", + "bbox": { + "l": 242.7, + "t": 435.41, + "r": 256.41, + "b": 468.73, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 468.73, + "r_x1": 256.41, + "r_y1": 468.73, + "r_x2": 256.41, + "r_y2": 435.41, + "r_x3": 242.7, + "r_y3": 435.41, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 20, + "label": "text", + "bbox": { + "l": 242.7, + "t": 323.39, + "r": 256.41, + "b": 355.1, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 355.1, + "r_x1": 256.41, + "r_y1": 355.1, + "r_x2": 256.41, + "r_y2": 323.39, + "r_x3": 242.7, + "r_y3": 323.39, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 21, + "label": "text", + "bbox": { + "l": 315.82, + "t": 634.24, + "r": 329.54, + "b": 725.23, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 725.23, + "r_x1": 329.54, + "r_y1": 725.23, + "r_x2": 329.54, + "r_y2": 634.24, + "r_x3": 315.82, + "r_y3": 634.24, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 22, + "label": "text", + "bbox": { + "l": 315.82, + "t": 542.27, + "r": 329.54, + "b": 590.23, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 590.23, + "r_x1": 329.54, + "r_y1": 590.23, + "r_x2": 329.54, + "r_y2": 542.27, + "r_x3": 315.82, + "r_y3": 542.27, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 23, + "label": "text", + "bbox": { + "l": 315.82, + "t": 319.15, + "r": 329.54, + "b": 357.35, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 357.35, + "r_x1": 329.54, + "r_y1": 357.35, + "r_x2": 329.54, + "r_y2": 319.15, + "r_x3": 315.82, + "r_y3": 319.15, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] }, - "text": "package" + "text": null, + "otsl_seq": [ + "fcel", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "ecel", + "nl", + "fcel", + "fcel", + "ecel", + "fcel", + "nl", + "ecel", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 4, + "num_cols": 4, + "table_cells": [ + { + "bbox": { + "l": 106.57, + "t": 534.41, + "r": 120.29, + "b": 596.98, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 106.57, + "t": 420.78, + "r": 120.29, + "b": 483.35, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 106.57, + "t": 307.16, + "r": 120.29, + "b": 369.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 641.33, + "r": 187.79, + "b": 718.48, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "this is row 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 531.29, + "r": 187.79, + "b": 600.35, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 410.9, + "r": 187.79, + "b": 494.6, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 326.21, + "r": 187.79, + "b": 350.6, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 242.7, + "t": 647.54, + "r": 256.41, + "b": 711.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "and row 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 242.7, + "t": 435.41, + "r": 256.41, + "b": 468.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 242.7, + "t": 323.39, + "r": 256.41, + "b": 355.1, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 315.82, + "t": 634.24, + "r": 329.54, + "b": 725.23, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and last row 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 315.82, + "t": 542.27, + "r": 329.54, + "b": 590.23, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "nothing", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 315.82, + "t": 319.15, + "r": 329.54, + "b": 357.35, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "inside", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } ], "body": [ { - "label": "text", - "id": 1, - "page_no": 0, - "cluster": { - "id": 1, - "label": "text", - "bbox": { - "l": 131.21, - "t": 441.01, - "r": 152.2, - "b": 521.08, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.523, - "cells": [ - { - "index": 2, - "rgba": { - "r": 0, - "g": 0, - "b": 0, - "a": 255 - }, - "rect": { - "r_x0": 131.21, - "r_y0": 521.08, - "r_x1": 152.2, - "r_y1": 521.08, - "r_x2": 152.2, - "r_y2": 441.01, - "r_x3": 131.21, - "r_y3": 441.01, - "coord_origin": "TOPLEFT" - }, - "text": "package", - "orig": "package", - "text_direction": "left_to_right", - "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - "text": "package" - } - ], - "headers": [ - { - "label": "page_header", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 77.1, - "t": 89.13, - "r": 126.08, - "b": 523.32, + "l": 75.13, + "t": 279.86, + "r": 361.19, + "b": 739.0, "coord_origin": "TOPLEFT" }, - "confidence": 0.602, + "confidence": 0.947, "cells": [ { "index": 0, @@ -453,21 +3575,21 @@ "a": 255 }, "rect": { - "r_x0": 77.1, - "r_y0": 520.76, - "r_x1": 96.68, - "r_y1": 520.76, - "r_x2": 96.68, - "r_y2": 89.24, - "r_x3": 77.1, - "r_y3": 89.24, + "r_x0": 106.57, + "r_y0": 596.98, + "r_x1": 120.29, + "r_y1": 596.98, + "r_x2": 120.29, + "r_y2": 534.41, + "r_x3": 106.57, + "r_y3": 534.41, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Column 0", + "orig": "Column 0", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false }, { "index": 1, @@ -478,28 +3600,1098 @@ "a": 255 }, "rect": { - "r_x0": 100.64, - "r_y0": 523.32, - "r_x1": 126.08, - "r_y1": 523.32, - "r_x2": 126.08, - "r_y2": 89.13, - "r_x3": 100.64, - "r_y3": 89.13, + "r_x0": 106.57, + "r_y0": 483.35, + "r_x1": 120.29, + "r_y1": 483.35, + "r_x2": 120.29, + "r_y2": 420.78, + "r_x3": 106.57, + "r_y3": 420.78, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "Column 1", + "orig": "Column 1", "text_direction": "left_to_right", "confidence": 1.0, - "from_ocr": true + "from_ocr": false + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 106.57, + "r_y0": 369.73, + "r_x1": 120.29, + "r_y1": 369.73, + "r_x2": 120.29, + "r_y2": 307.16, + "r_x3": 106.57, + "r_y3": 307.16, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 718.48, + "r_x1": 187.79, + "r_y1": 718.48, + "r_x2": 187.79, + "r_y2": 641.33, + "r_x3": 174.07, + "r_y3": 641.33, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 600.35, + "r_x1": 187.79, + "r_y1": 600.35, + "r_x2": 187.79, + "r_y2": 531.29, + "r_x3": 174.07, + "r_y3": 531.29, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 494.6, + "r_x1": 187.79, + "r_y1": 494.6, + "r_x2": 187.79, + "r_y2": 410.9, + "r_x3": 174.07, + "r_y3": 410.9, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 350.6, + "r_x1": 187.79, + "r_y1": 350.6, + "r_x2": 187.79, + "r_y2": 326.21, + "r_x3": 174.07, + "r_y3": 326.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 711.73, + "r_x1": 256.41, + "r_y1": 711.73, + "r_x2": 256.41, + "r_y2": 647.54, + "r_x3": 242.7, + "r_y3": 647.54, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 468.73, + "r_x1": 256.41, + "r_y1": 468.73, + "r_x2": 256.41, + "r_y2": 435.41, + "r_x3": 242.7, + "r_y3": 435.41, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 355.1, + "r_x1": 256.41, + "r_y1": 355.1, + "r_x2": 256.41, + "r_y2": 323.39, + "r_x3": 242.7, + "r_y3": 323.39, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 725.23, + "r_x1": 329.54, + "r_y1": 725.23, + "r_x2": 329.54, + "r_y2": 634.24, + "r_x3": 315.82, + "r_y3": 634.24, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 590.23, + "r_x1": 329.54, + "r_y1": 590.23, + "r_x2": 329.54, + "r_y2": 542.27, + "r_x3": 315.82, + "r_y3": 542.27, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 357.35, + "r_x1": 329.54, + "r_y1": 357.35, + "r_x2": 329.54, + "r_y2": 319.15, + "r_x3": 315.82, + "r_y3": 319.15, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false } ], - "children": [] + "children": [ + { + "id": 11, + "label": "text", + "bbox": { + "l": 106.57, + "t": 534.41, + "r": 120.29, + "b": 596.98, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 106.57, + "r_y0": 596.98, + "r_x1": 120.29, + "r_y1": 596.98, + "r_x2": 120.29, + "r_y2": 534.41, + "r_x3": 106.57, + "r_y3": 534.41, + "coord_origin": "TOPLEFT" + }, + "text": "Column 0", + "orig": "Column 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 106.57, + "t": 420.78, + "r": 120.29, + "b": 483.35, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 106.57, + "r_y0": 483.35, + "r_x1": 120.29, + "r_y1": 483.35, + "r_x2": 120.29, + "r_y2": 420.78, + "r_x3": 106.57, + "r_y3": 420.78, + "coord_origin": "TOPLEFT" + }, + "text": "Column 1", + "orig": "Column 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 106.57, + "t": 307.16, + "r": 120.29, + "b": 369.73, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 106.57, + "r_y0": 369.73, + "r_x1": 120.29, + "r_y1": 369.73, + "r_x2": 120.29, + "r_y2": 307.16, + "r_x3": 106.57, + "r_y3": 307.16, + "coord_origin": "TOPLEFT" + }, + "text": "Column 2", + "orig": "Column 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 174.07, + "t": 641.33, + "r": 187.79, + "b": 718.48, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 718.48, + "r_x1": 187.79, + "r_y1": 718.48, + "r_x2": 187.79, + "r_y2": 641.33, + "r_x3": 174.07, + "r_y3": 641.33, + "coord_origin": "TOPLEFT" + }, + "text": "this is row 0", + "orig": "this is row 0", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 174.07, + "t": 531.29, + "r": 187.79, + "b": 600.35, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 600.35, + "r_x1": 187.79, + "r_y1": 600.35, + "r_x2": 187.79, + "r_y2": 531.29, + "r_x3": 174.07, + "r_y3": 531.29, + "coord_origin": "TOPLEFT" + }, + "text": "some cells", + "orig": "some cells", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 174.07, + "t": 410.9, + "r": 187.79, + "b": 494.6, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 494.6, + "r_x1": 187.79, + "r_y1": 494.6, + "r_x2": 187.79, + "r_y2": 410.9, + "r_x3": 174.07, + "r_y3": 410.9, + "coord_origin": "TOPLEFT" + }, + "text": "have content", + "orig": "have content", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 17, + "label": "text", + "bbox": { + "l": 174.07, + "t": 326.21, + "r": 187.79, + "b": 350.6, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 174.07, + "r_y0": 350.6, + "r_x1": 187.79, + "r_y1": 350.6, + "r_x2": 187.79, + "r_y2": 326.21, + "r_x3": 174.07, + "r_y3": 326.21, + "coord_origin": "TOPLEFT" + }, + "text": "and", + "orig": "and", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 18, + "label": "text", + "bbox": { + "l": 242.7, + "t": 647.54, + "r": 256.41, + "b": 711.73, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 711.73, + "r_x1": 256.41, + "r_y1": 711.73, + "r_x2": 256.41, + "r_y2": 647.54, + "r_x3": 242.7, + "r_y3": 647.54, + "coord_origin": "TOPLEFT" + }, + "text": "and row 1", + "orig": "and row 1", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 19, + "label": "text", + "bbox": { + "l": 242.7, + "t": 435.41, + "r": 256.41, + "b": 468.73, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 468.73, + "r_x1": 256.41, + "r_y1": 468.73, + "r_x2": 256.41, + "r_y2": 435.41, + "r_x3": 242.7, + "r_y3": 435.41, + "coord_origin": "TOPLEFT" + }, + "text": "other", + "orig": "other", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 20, + "label": "text", + "bbox": { + "l": 242.7, + "t": 323.39, + "r": 256.41, + "b": 355.1, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.7, + "r_y0": 355.1, + "r_x1": 256.41, + "r_y1": 355.1, + "r_x2": 256.41, + "r_y2": 323.39, + "r_x3": 242.7, + "r_y3": 323.39, + "coord_origin": "TOPLEFT" + }, + "text": "have", + "orig": "have", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 21, + "label": "text", + "bbox": { + "l": 315.82, + "t": 634.24, + "r": 329.54, + "b": 725.23, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 725.23, + "r_x1": 329.54, + "r_y1": 725.23, + "r_x2": 329.54, + "r_y2": 634.24, + "r_x3": 315.82, + "r_y3": 634.24, + "coord_origin": "TOPLEFT" + }, + "text": "and last row 2", + "orig": "and last row 2", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 22, + "label": "text", + "bbox": { + "l": 315.82, + "t": 542.27, + "r": 329.54, + "b": 590.23, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 590.23, + "r_x1": 329.54, + "r_y1": 590.23, + "r_x2": 329.54, + "r_y2": 542.27, + "r_x3": 315.82, + "r_y3": 542.27, + "coord_origin": "TOPLEFT" + }, + "text": "nothing", + "orig": "nothing", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + }, + { + "id": 23, + "label": "text", + "bbox": { + "l": 315.82, + "t": 319.15, + "r": 329.54, + "b": 357.35, + "coord_origin": "TOPLEFT" + }, + "confidence": 1.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.82, + "r_y0": 357.35, + "r_x1": 329.54, + "r_y1": 357.35, + "r_x2": 329.54, + "r_y2": 319.15, + "r_x3": 315.82, + "r_y3": 319.15, + "coord_origin": "TOPLEFT" + }, + "text": "inside", + "orig": "inside", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": false + } + ], + "children": [] + } + ] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" + "text": null, + "otsl_seq": [ + "fcel", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "ecel", + "nl", + "fcel", + "fcel", + "ecel", + "fcel", + "nl", + "ecel", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 4, + "num_cols": 4, + "table_cells": [ + { + "bbox": { + "l": 106.57, + "t": 534.41, + "r": 120.29, + "b": 596.98, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 106.57, + "t": 420.78, + "r": 120.29, + "b": 483.35, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 106.57, + "t": 307.16, + "r": 120.29, + "b": 369.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Column 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 641.33, + "r": 187.79, + "b": 718.48, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "this is row 0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 531.29, + "r": 187.79, + "b": 600.35, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "some cells", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 410.9, + "r": 187.79, + "b": 494.6, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "have content", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.07, + "t": 326.21, + "r": 187.79, + "b": 350.6, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "and", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 242.7, + "t": 647.54, + "r": 256.41, + "b": 711.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "and row 1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 242.7, + "t": 435.41, + "r": 256.41, + "b": 468.73, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "other", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 242.7, + "t": 323.39, + "r": 256.41, + "b": 355.1, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "have", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 315.82, + "t": 634.24, + "r": 329.54, + "b": 725.23, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "and last row 2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 315.82, + "t": 542.27, + "r": 329.54, + "b": 590.23, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "nothing", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 315.82, + "t": 319.15, + "r": 329.54, + "b": 357.35, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "inside", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } - ] + ], + "headers": [] } } ] \ No newline at end of file diff --git a/tests/test_e2e_ocr_conversion.py b/tests/test_e2e_ocr_conversion.py index a19a4090..22cddf06 100644 --- a/tests/test_e2e_ocr_conversion.py +++ b/tests/test_e2e_ocr_conversion.py @@ -57,24 +57,24 @@ def test_e2e_conversions(): engines: List[Tuple[OcrOptions, bool]] = [ (TesseractOcrOptions(), True), - (TesseractCliOcrOptions(), True), - (EasyOcrOptions(), False), - (TesseractOcrOptions(force_full_page_ocr=True), True), - (TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]), True), - (TesseractCliOcrOptions(force_full_page_ocr=True), True), - (TesseractCliOcrOptions(force_full_page_ocr=True, lang=["auto"]), True), - (EasyOcrOptions(force_full_page_ocr=True), False), + # (TesseractCliOcrOptions(), True), + # (EasyOcrOptions(), False), + # (TesseractOcrOptions(force_full_page_ocr=True), True), + # (TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]), True), + # (TesseractCliOcrOptions(force_full_page_ocr=True), True), + # (TesseractCliOcrOptions(force_full_page_ocr=True, lang=["auto"]), True), + # (EasyOcrOptions(force_full_page_ocr=True), False), ] - - # rapidocr is only available for Python >=3.6,<3.13 - if sys.version_info < (3, 13): - engines.append((RapidOcrOptions(), False)) - engines.append((RapidOcrOptions(force_full_page_ocr=True), False)) - - # only works on mac - if "darwin" == sys.platform: - engines.append((OcrMacOptions(), True)) - engines.append((OcrMacOptions(force_full_page_ocr=True), True)) + # + # # rapidocr is only available for Python >=3.6,<3.13 + # if sys.version_info < (3, 13): + # engines.append((RapidOcrOptions(), False)) + # engines.append((RapidOcrOptions(force_full_page_ocr=True), False)) + # + # # only works on mac + # if "darwin" == sys.platform: + # engines.append((OcrMacOptions(), True)) + # engines.append((OcrMacOptions(force_full_page_ocr=True), True)) for ocr_options, supports_rotation in engines: print(