diff --git a/docling/models/layout_model.py b/docling/models/layout_model.py index 05a86f31..77609aae 100644 --- a/docling/models/layout_model.py +++ b/docling/models/layout_model.py @@ -2,6 +2,7 @@ import copy import logging import warnings from collections.abc import Iterable +from copy import deepcopy from pathlib import Path from typing import Optional @@ -19,6 +20,7 @@ from docling.models.base_model import BasePageModel from docling.models.utils.hf_model_download import download_hf_model from docling.utils.accelerator_utils import decide_device from docling.utils.layout_postprocessor import LayoutPostprocessor +from docling.utils.orientation import detect_orientation, rotate_bounding_box from docling.utils.profiling import TimeRecorder from docling.utils.visualization import draw_clusters @@ -157,7 +159,9 @@ class LayoutModel(BasePageModel): assert page.size is not None page_image = page.get_image(scale=1.0) assert page_image is not None - + page_orientation = detect_orientation(page.cells) + if page_orientation: + page_image = page_image.rotate(-page_orientation, expand=True) clusters = [] for ix, pred_item in enumerate( self.layout_predictor.predict(page_image) @@ -168,11 +172,16 @@ class LayoutModel(BasePageModel): .replace(" ", "_") .replace("-", "_") ) # Temporary, until docling-ibm-model uses docling-core types + bbox = BoundingBox.model_validate(pred_item) + if page_orientation: + bbox = rotate_bounding_box( + bbox, page_orientation, page_image.size + ).to_bounding_box() cluster = Cluster( id=ix, label=label, confidence=pred_item["confidence"], - bbox=BoundingBox.model_validate(pred_item), + bbox=bbox, cells=[], ) clusters.append(cluster) diff --git a/docling/models/ocr_mac_model.py b/docling/models/ocr_mac_model.py index 609b1240..6f90af23 100644 --- a/docling/models/ocr_mac_model.py +++ b/docling/models/ocr_mac_model.py @@ -107,10 +107,10 @@ class OcrMacModel(BaseOcrModel): x2 = x1 + w * im_width y1 = y2 - h * im_height - left = x1 / self.scale - top = y1 / self.scale - right = x2 / self.scale - bottom = y2 / self.scale + left = x1 / self.scale + ocr_rect.l + top = y1 / self.scale + ocr_rect.t + right = x2 / self.scale + ocr_rect.l + bottom = y2 / self.scale + ocr_rect.t cells.append( TextCell( diff --git a/docling/models/table_structure_model.py b/docling/models/table_structure_model.py index f5f2cb14..2904b6ed 100644 --- a/docling/models/table_structure_model.py +++ b/docling/models/table_structure_model.py @@ -1,8 +1,7 @@ import copy import warnings -from collections.abc import Iterable from pathlib import Path -from typing import Optional +from typing import Iterable, Optional, Tuple, cast import numpy from docling_core.types.doc import BoundingBox, DocItemLabel, TableCell @@ -11,6 +10,7 @@ from docling_core.types.doc.page import ( TextCellUnit, ) from PIL import ImageDraw +from PIL.Image import Image from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions from docling.datamodel.base_models import Page, Table, TableStructurePrediction @@ -23,6 +23,7 @@ from docling.datamodel.settings import settings from docling.models.base_model import BasePageModel from docling.models.utils.hf_model_download import download_hf_model from docling.utils.accelerator_utils import decide_device +from docling.utils.orientation import detect_orientation, rotate_bounding_box from docling.utils.profiling import TimeRecorder @@ -30,6 +31,8 @@ class TableStructureModel(BasePageModel): _model_repo_folder = "ds4sd--docling-models" _model_path = "model_artifacts/tableformer" + _table_labels = {DocItemLabel.TABLE, DocItemLabel.DOCUMENT_INDEX} + def __init__( self, enabled: bool, @@ -186,31 +189,48 @@ class TableStructureModel(BasePageModel): page.predictions.tablestructure = ( TableStructurePrediction() ) # dummy - - in_tables = [ - ( - cluster, - [ - round(cluster.bbox.l) * self.scale, - round(cluster.bbox.t) * self.scale, - round(cluster.bbox.r) * self.scale, - round(cluster.bbox.b) * self.scale, - ], - ) + cells_orientation = detect_orientation(page.cells) + # Keep only table bboxes + in_tables_clusters = [ + cluster for cluster in page.predictions.layout.clusters - if cluster.label - in [DocItemLabel.TABLE, DocItemLabel.DOCUMENT_INDEX] + if cluster.label in self._table_labels ] - if not len(in_tables): + + if not len(in_tables_clusters): yield page continue - + # Rotate and scale table image + page_im = cast(Image, page.get_image()) + scaled_page_im: Image = cast( + Image, page.get_image(scale=self.scale) + ) + if cells_orientation: + scaled_page_im = scaled_page_im.rotate( + -cells_orientation, expand=True + ) page_input = { - "width": page.size.width * self.scale, - "height": page.size.height * self.scale, - "image": numpy.asarray(page.get_image(scale=self.scale)), + "width": scaled_page_im.size[0], + "height": scaled_page_im.size[1], + "image": numpy.asarray(scaled_page_im), } - + # Rotate and scale table cells + in_tables = [ + ( + c, + [ + round(x) * self.scale + for x in _rotate_bbox( + c.bbox, + orientation=-cells_orientation, + im_size=page_im.size, + ) + .to_top_left_origin(page_im.size[1]) + .as_tuple() + ], + ) + for c in in_tables_clusters + ] table_clusters, table_bboxes = zip(*in_tables) if len(table_bboxes): @@ -238,11 +258,16 @@ class TableStructureModel(BasePageModel): scale=self.scale ) ) + new_bbox = _rotate_bbox( + new_cell.to_bounding_box(), + orientation=cells_orientation, + im_size=scaled_page_im.size, + ).model_dump() tokens.append( { "id": new_cell.index, "text": new_cell.text, - "bbox": new_cell.rect.to_bounding_box().model_dump(), + "bbox": new_bbox, } ) page_input["tokens"] = tokens @@ -302,3 +327,11 @@ class TableStructureModel(BasePageModel): ) yield page + + +def _rotate_bbox( + bbox: BoundingBox, *, orientation: int, im_size: Tuple[int, int] +) -> BoundingBox: + if orientation: + return rotate_bounding_box(bbox, orientation, im_size).to_bounding_box() + return bbox diff --git a/docling/utils/ocr_utils.py b/docling/utils/ocr_utils.py index c4bc9695..bf7b510d 100644 --- a/docling/utils/ocr_utils.py +++ b/docling/utils/ocr_utils.py @@ -3,7 +3,10 @@ from typing import Optional, Tuple from docling_core.types.doc import BoundingBox, CoordOrigin from docling_core.types.doc.page import BoundingRectangle -from docling.utils.orientation import CLIPPED_ORIENTATIONS, rotate_bounding_box +from docling.utils.orientation import ( + CLIPPED_ORIENTATIONS, + rotate_bounding_box, +) def map_tesseract_script(script: str) -> str: @@ -40,7 +43,9 @@ def tesseract_box_to_bounding_rectangle( orientation: int, im_size: Tuple[int, int], ) -> BoundingRectangle: - # box is in the top, left, height, width format, top left coordinates + # bbox is in the top, left, height, width format, top left coordinates + # We detected the tesseract on the document rotated with minus orientation, we have + # to apply an orientation angle rect = rotate_bounding_box(bbox, angle=orientation, im_size=im_size) rect = BoundingRectangle( r_x0=rect.r_x0 / scale, @@ -51,7 +56,7 @@ def tesseract_box_to_bounding_rectangle( r_y2=rect.r_y2 / scale, r_x3=rect.r_x3 / scale, r_y3=rect.r_y3 / scale, - coord_origin=CoordOrigin.TOPLEFT, + coord_origin=rect.coord_origin, ) if original_offset is not None: if original_offset.coord_origin is not CoordOrigin.TOPLEFT: diff --git a/docling/utils/orientation.py b/docling/utils/orientation.py index 29c02ff7..eb118d13 100644 --- a/docling/utils/orientation.py +++ b/docling/utils/orientation.py @@ -1,11 +1,24 @@ +from collections import Counter +from operator import itemgetter from typing import Tuple from docling_core.types.doc import BoundingBox, CoordOrigin -from docling_core.types.doc.page import BoundingRectangle +from docling_core.types.doc.page import BoundingRectangle, TextCell CLIPPED_ORIENTATIONS = [0, 90, 180, 270] +def _clipped_orientation(angle: float) -> int: + return min((abs(angle - o) % 360, o) for o in CLIPPED_ORIENTATIONS)[1] + + +def detect_orientation(cells: list[TextCell]) -> int: + if not cells: + return 0 + orientation_counter = Counter(_clipped_orientation(c.rect.angle_360) for c in cells) + return max(orientation_counter.items(), key=itemgetter(1))[0] + + def rotate_bounding_box( bbox: BoundingBox, angle: int, im_size: Tuple[int, int] ) -> BoundingRectangle: diff --git a/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.json b/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.json index dd51e390..58701d5d 100644 --- a/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.json +++ b/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.json @@ -213,10 +213,10 @@ "prov": [ { "bbox": [ - 139.66741943359375, + 139.66746520996094, 322.5054626464844, - 475.00927734375, - 454.45458984375 + 475.0093078613281, + 454.4546203613281 ], "page": 1, "span": [ diff --git a/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.pages.json b/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.pages.json index 3010fbb6..114cbf31 100644 --- a/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.pages.json +++ b/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.pages.json @@ -2705,7 +2705,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.9373534917831421, + "confidence": 0.9373531937599182, "cells": [ { "index": 0, @@ -2745,7 +2745,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.8858680725097656, + "confidence": 0.8858677744865417, "cells": [ { "index": 1, @@ -2785,7 +2785,7 @@ "b": 152.90697999999998, "coord_origin": "TOPLEFT" }, - "confidence": 0.9806433916091919, + "confidence": 0.9806435108184814, "cells": [ { "index": 2, @@ -3155,7 +3155,7 @@ "b": 327.98218, "coord_origin": "TOPLEFT" }, - "confidence": 0.9591909050941467, + "confidence": 0.9591910243034363, "cells": [ { "index": 15, @@ -3339,9 +3339,9 @@ "id": 0, "label": "table", "bbox": { - "l": 139.66741943359375, - "t": 337.54541015625, - "r": 475.00927734375, + "l": 139.66746520996094, + "t": 337.5453796386719, + "r": 475.0093078613281, "b": 469.4945373535156, "coord_origin": "TOPLEFT" }, @@ -7846,7 +7846,7 @@ "b": 518.17419, "coord_origin": "TOPLEFT" }, - "confidence": 0.9589294195175171, + "confidence": 0.9589295387268066, "cells": [ { "index": 91, @@ -8243,9 +8243,9 @@ "id": 0, "label": "table", "bbox": { - "l": 139.66741943359375, - "t": 337.54541015625, - "r": 475.00927734375, + "l": 139.66746520996094, + "t": 337.5453796386719, + "r": 475.0093078613281, "b": 469.4945373535156, "coord_origin": "TOPLEFT" }, @@ -13641,7 +13641,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.9373534917831421, + "confidence": 0.9373531937599182, "cells": [ { "index": 0, @@ -13687,7 +13687,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.8858680725097656, + "confidence": 0.8858677744865417, "cells": [ { "index": 1, @@ -13733,7 +13733,7 @@ "b": 152.90697999999998, "coord_origin": "TOPLEFT" }, - "confidence": 0.9806433916091919, + "confidence": 0.9806435108184814, "cells": [ { "index": 2, @@ -14121,7 +14121,7 @@ "b": 327.98218, "coord_origin": "TOPLEFT" }, - "confidence": 0.9591909050941467, + "confidence": 0.9591910243034363, "cells": [ { "index": 15, @@ -14311,9 +14311,9 @@ "id": 0, "label": "table", "bbox": { - "l": 139.66741943359375, - "t": 337.54541015625, - "r": 475.00927734375, + "l": 139.66746520996094, + "t": 337.5453796386719, + "r": 475.0093078613281, "b": 469.4945373535156, "coord_origin": "TOPLEFT" }, @@ -19701,7 +19701,7 @@ "b": 518.17419, "coord_origin": "TOPLEFT" }, - "confidence": 0.9589294195175171, + "confidence": 0.9589295387268066, "cells": [ { "index": 91, @@ -20116,7 +20116,7 @@ "b": 152.90697999999998, "coord_origin": "TOPLEFT" }, - "confidence": 0.9806433916091919, + "confidence": 0.9806435108184814, "cells": [ { "index": 2, @@ -20504,7 +20504,7 @@ "b": 327.98218, "coord_origin": "TOPLEFT" }, - "confidence": 0.9591909050941467, + "confidence": 0.9591910243034363, "cells": [ { "index": 15, @@ -20694,9 +20694,9 @@ "id": 0, "label": "table", "bbox": { - "l": 139.66741943359375, - "t": 337.54541015625, - "r": 475.00927734375, + "l": 139.66746520996094, + "t": 337.5453796386719, + "r": 475.0093078613281, "b": 469.4945373535156, "coord_origin": "TOPLEFT" }, @@ -26084,7 +26084,7 @@ "b": 518.17419, "coord_origin": "TOPLEFT" }, - "confidence": 0.9589294195175171, + "confidence": 0.9589295387268066, "cells": [ { "index": 91, @@ -26499,7 +26499,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.9373534917831421, + "confidence": 0.9373531937599182, "cells": [ { "index": 0, @@ -26545,7 +26545,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.8858680725097656, + "confidence": 0.8858677744865417, "cells": [ { "index": 1, diff --git a/tests/data/webp/groundtruth/docling_v2/webp-test.doctags.txt b/tests/data/webp/groundtruth/docling_v2/webp-test.doctags.txt index 5682a134..76fe886d 100644 --- a/tests/data/webp/groundtruth/docling_v2/webp-test.doctags.txt +++ b/tests/data/webp/groundtruth/docling_v2/webp-test.doctags.txt @@ -1,2 +1,2 @@ -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package +Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test.doctags.txt b/tests/data_scanned/groundtruth/docling_v1/ocr_test.doctags.txt index b00cc668..20604b3e 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test.doctags.txt +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test.doctags.txt @@ -1,3 +1,8 @@ -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package + + +Vertically mergedOther merged columnYet another column +valueSome other valueYet another value +valueSome other valueYet another value +
\ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test.json index 8dbfff1f..81ad10e7 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test.json +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test.json @@ -27,53 +27,321 @@ "file-info": { "filename": "ocr_test.pdf", "filename-prov": null, - "document-hash": "80f38f5b87a84870681556176a9622186fd200dd32c5557be9e0c0af05b8bc61", + "document-hash": "0f391d12850f72bb91897f7f3bebfd4a0a8357e2a883ac1f664e32342c04e418", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [ { - "hash": "14d896dc8bcb7ee7c08c0347eb6be8dcb92a3782501992f1ea14d2e58077d4e3", + "hash": "32f328168da3f69890a725c1168799f9ff7337249e98b1f36c12965551477be5", "model": "default", "page": 1 } ] }, "main-text": [ + { + "name": "Table", + "type": "table", + "$ref": "#/tables/0" + } + ], + "figures": [], + "tables": [ { "prov": [ { "bbox": [ - 69.6796630536824, - 689.0124221922704, - 504.8720051760782, - 764.9216921155637 + 69.04969024658203, + 277.41973876953125, + 551.0990600585938, + 524.3504486083984 ], "page": 1, "span": [ 0, - 94 + 0 ], "__ref_s3_data": null } ], - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", - "type": "paragraph", + "text": "", + "type": "table", "payload": null, - "name": "Text", - "font": null + "#-cols": 3, + "#-rows": 3, + "data": [ + [ + { + "bbox": [ + 97.33333333333333, + 105.66666666666666, + 190.0, + 126.33333333333334 + ], + "spans": [ + [ + 0, + 0 + ] + ], + "text": "Vertically merged", + "type": "col_header", + "col": 0, + "col-header": true, + "col-span": [ + 0, + 1 + ], + "row": 0, + "row-header": false, + "row-span": [ + 0, + 1 + ] + }, + { + "bbox": [ + 232.66666666666666, + 105.66666666666666, + 364.0, + 126.33333333333334 + ], + "spans": [ + [ + 0, + 1 + ] + ], + "text": "Other merged column", + "type": "col_header", + "col": 1, + "col-header": true, + "col-span": [ + 1, + 2 + ], + "row": 0, + "row-header": false, + "row-span": [ + 0, + 1 + ] + }, + { + "bbox": [ + 406.3333333333333, + 105.66666666666666, + 518.3333333333333, + 121.66666666666666 + ], + "spans": [ + [ + 0, + 2 + ] + ], + "text": "Yet another column", + "type": "col_header", + "col": 2, + "col-header": true, + "col-span": [ + 2, + 3 + ], + "row": 0, + "row-header": false, + "row-span": [ + 0, + 1 + ] + } + ], + [ + { + "bbox": [ + 121.66666666666667, + 204.33333333333334, + 168.66666666666666, + 220.0 + ], + "spans": [ + [ + 1, + 0 + ] + ], + "text": "value", + "type": "body", + "col": 0, + "col-header": false, + "col-span": [ + 0, + 1 + ], + "row": 1, + "row-header": false, + "row-span": [ + 1, + 2 + ] + }, + { + "bbox": [ + 247.0, + 188.33333333333331, + 349.6666666666667, + 204.33333333333334 + ], + "spans": [ + [ + 1, + 1 + ] + ], + "text": "Some other value", + "type": "body", + "col": 1, + "col-header": false, + "col-span": [ + 1, + 2 + ], + "row": 1, + "row-header": false, + "row-span": [ + 1, + 2 + ] + }, + { + "bbox": [ + 408.3333333333333, + 188.33333333333331, + 514.0, + 204.33333333333334 + ], + "spans": [ + [ + 1, + 2 + ] + ], + "text": "Yet another value", + "type": "body", + "col": 2, + "col-header": false, + "col-span": [ + 2, + 3 + ], + "row": 1, + "row-header": false, + "row-span": [ + 1, + 2 + ] + } + ], + [ + { + "bbox": [ + 121.66666666666667, + 284.0, + 168.66666666666666, + 300.0 + ], + "spans": [ + [ + 2, + 0 + ] + ], + "text": "value", + "type": "body", + "col": 0, + "col-header": false, + "col-span": [ + 0, + 1 + ], + "row": 2, + "row-header": false, + "row-span": [ + 2, + 3 + ] + }, + { + "bbox": [ + 247.0, + 268.0, + 349.6666666666667, + 284.0 + ], + "spans": [ + [ + 2, + 1 + ] + ], + "text": "Some other value", + "type": "body", + "col": 1, + "col-header": false, + "col-span": [ + 1, + 2 + ], + "row": 2, + "row-header": false, + "row-span": [ + 2, + 3 + ] + }, + { + "bbox": [ + 408.3333333333333, + 268.0, + 514.0, + 284.0 + ], + "spans": [ + [ + 2, + 2 + ] + ], + "text": "Yet another value", + "type": "body", + "col": 2, + "col-header": false, + "col-span": [ + 2, + 3 + ], + "row": 2, + "row-header": false, + "row-span": [ + 2, + 3 + ] + } + ] + ], + "model": null, + "bounding-box": null } ], - "figures": [], - "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [ { - "height": 841.9216918945312, + "height": 612.0, "page": 1, - "width": 595.201171875 + "width": 792.0 } ], "page-footers": [], diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test.md b/tests/data_scanned/groundtruth/docling_v1/ocr_test.md index 42896546..e3d7c0b8 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test.md +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test.md @@ -1 +1,4 @@ -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package \ No newline at end of file +| Vertically merged | Other merged column | Yet another column | +|---------------------|-----------------------|----------------------| +| value | Some other value | Yet another value | +| value | Some other value | Yet another value | \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test.pages.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test.pages.json index b53b75aa..f9ed59fd 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test.pages.json +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test.pages.json @@ -2,8 +2,8 @@ { "page_no": 0, "size": { - "width": 595.201171875, - "height": 841.9216918945312 + "width": 792.0, + "height": 612.0 }, "parsed_page": { "dimension": { @@ -11,47 +11,47 @@ "rect": { "r_x0": 0.0, "r_y0": 0.0, - "r_x1": 595.201171875, + "r_x1": 792.0, "r_y1": 0.0, - "r_x2": 595.201171875, - "r_y2": 841.9216918945312, + "r_x2": 792.0, + "r_y2": 612.0, "r_x3": 0.0, - "r_y3": 841.9216918945312, + "r_y3": 612.0, "coord_origin": "BOTTOMLEFT" }, "boundary_type": "crop_box", "art_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "bleed_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "crop_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "media_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "trim_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" } @@ -69,20 +69,20 @@ "a": 255 }, "rect": { - "r_x0": 73.34702132031646, - "r_y0": 97.99999977896755, - "r_x1": 503.64955224479564, - "r_y1": 97.99999977896755, - "r_x2": 503.64955224479564, - "r_y2": 76.99999977896756, - "r_x3": 73.34702132031646, - "r_y3": 76.99999977896756, + "r_x0": 97.33333333333333, + "r_y0": 126.33333333333334, + "r_x1": 190.0, + "r_y1": 126.33333333333334, + "r_x2": 190.0, + "r_y2": 105.66666666666666, + "r_x3": 97.33333333333333, + "r_y3": 105.66666666666666, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 89.0, "from_ocr": true }, { @@ -94,20 +94,20 @@ "a": 255 }, "rect": { - "r_x0": 69.6796630536824, - "r_y0": 124.83139494707741, - "r_x1": 504.8720051760782, - "r_y1": 124.83139494707741, - "r_x2": 504.8720051760782, - "r_y2": 104.00000011573796, - "r_x3": 69.6796630536824, - "r_y3": 104.00000011573796, + "r_x0": 112.0, + "r_y0": 157.66666666666669, + "r_x1": 182.33333333333334, + "r_y1": 157.66666666666669, + "r_x2": 182.33333333333334, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true }, { @@ -119,20 +119,345 @@ "a": 255 }, "rect": { - "r_x0": 71.84193505100733, - "r_y0": 152.90926970226084, - "r_x1": 153.088934155825, - "r_y1": 152.90926970226084, - "r_x2": 153.088934155825, - "r_y2": 129.797125232046, - "r_x3": 71.84193505100733, - "r_y3": 129.797125232046, + "r_x0": 121.66666666666667, + "r_y0": 220.0, + "r_x1": 168.66666666666666, + "r_y1": 220.0, + "r_x2": 168.66666666666666, + "r_y2": 204.33333333333334, + "r_x3": 121.66666666666667, + "r_y3": 204.33333333333334, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666667, + "r_y0": 300.0, + "r_x1": 168.66666666666666, + "r_y1": 300.0, + "r_x2": 168.66666666666666, + "r_y2": 284.0, + "r_x3": 121.66666666666667, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.66666666666666, + "r_y0": 126.33333333333334, + "r_x1": 364.0, + "r_y1": 126.33333333333334, + "r_x2": 364.0, + "r_y2": 105.66666666666666, + "r_x3": 232.66666666666666, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.66666666666663, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.66666666666663, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33333333333334, + "r_x1": 349.6666666666667, + "r_y1": 204.33333333333334, + "r_x2": 349.6666666666667, + "r_y2": 188.33333333333331, + "r_x3": 247.0, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.3333333333333, + "r_y1": 236.0, + "r_x2": 324.3333333333333, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.6666666666667, + "r_y1": 284.0, + "r_x2": 349.6666666666667, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33333333333337, + "r_x1": 324.3333333333333, + "r_y1": 315.33333333333337, + "r_x2": 324.3333333333333, + "r_y2": 299.66666666666663, + "r_x3": 277.0, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.3333333333333, + "r_y0": 121.66666666666666, + "r_x1": 518.3333333333333, + "r_y1": 121.66666666666666, + "r_x2": 518.3333333333333, + "r_y2": 105.66666666666666, + "r_x3": 406.3333333333333, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.3333333333333, + "r_y0": 153.0, + "r_x1": 499.3333333333333, + "r_y1": 153.0, + "r_x2": 499.3333333333333, + "r_y2": 137.0, + "r_x3": 429.3333333333333, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 204.33333333333334, + "r_x1": 514.0, + "r_y1": 204.33333333333334, + "r_x2": 514.0, + "r_y2": 188.33333333333331, + "r_x3": 408.3333333333333, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 236.0, + "r_x1": 487.6666666666667, + "r_y1": 236.0, + "r_x2": 487.6666666666667, + "r_y2": 220.0, + "r_x3": 440.6666666666667, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.3333333333333, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 315.33333333333337, + "r_x1": 487.6666666666667, + "r_y1": 315.33333333333337, + "r_x2": 487.6666666666667, + "r_y2": 299.66666666666663, + "r_x3": 440.6666666666667, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], @@ -147,15 +472,15 @@ "clusters": [ { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 69.6796630536824, - "t": 76.99999977896756, - "r": 504.8720051760782, - "b": 152.90926970226084, + "l": 69.04969024658203, + "t": 87.64955139160156, + "r": 551.0990600585938, + "b": 334.58026123046875, "coord_origin": "TOPLEFT" }, - "confidence": 0.9715733528137207, + "confidence": 0.9790865778923035, "cells": [ { "index": 0, @@ -166,20 +491,20 @@ "a": 255 }, "rect": { - "r_x0": 73.34702132031646, - "r_y0": 97.99999977896755, - "r_x1": 503.64955224479564, - "r_y1": 97.99999977896755, - "r_x2": 503.64955224479564, - "r_y2": 76.99999977896756, - "r_x3": 73.34702132031646, - "r_y3": 76.99999977896756, + "r_x0": 97.33333333333333, + "r_y0": 126.33333333333334, + "r_x1": 190.0, + "r_y1": 126.33333333333334, + "r_x2": 190.0, + "r_y2": 105.66666666666666, + "r_x3": 97.33333333333333, + "r_y3": 105.66666666666666, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 89.0, "from_ocr": true }, { @@ -191,20 +516,20 @@ "a": 255 }, "rect": { - "r_x0": 69.6796630536824, - "r_y0": 124.83139494707741, - "r_x1": 504.8720051760782, - "r_y1": 124.83139494707741, - "r_x2": 504.8720051760782, - "r_y2": 104.00000011573796, - "r_x3": 69.6796630536824, - "r_y3": 104.00000011573796, + "r_x0": 112.0, + "r_y0": 157.66666666666669, + "r_x1": 182.33333333333334, + "r_y1": 157.66666666666669, + "r_x2": 182.33333333333334, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true }, { @@ -216,29 +541,2247 @@ "a": 255 }, "rect": { - "r_x0": 71.84193505100733, - "r_y0": 152.90926970226084, - "r_x1": 153.088934155825, - "r_y1": 152.90926970226084, - "r_x2": 153.088934155825, - "r_y2": 129.797125232046, - "r_x3": 71.84193505100733, - "r_y3": 129.797125232046, + "r_x0": 121.66666666666667, + "r_y0": 220.0, + "r_x1": 168.66666666666666, + "r_y1": 220.0, + "r_x2": 168.66666666666666, + "r_y2": 204.33333333333334, + "r_x3": 121.66666666666667, + "r_y3": 204.33333333333334, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666667, + "r_y0": 300.0, + "r_x1": 168.66666666666666, + "r_y1": 300.0, + "r_x2": 168.66666666666666, + "r_y2": 284.0, + "r_x3": 121.66666666666667, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.66666666666666, + "r_y0": 126.33333333333334, + "r_x1": 364.0, + "r_y1": 126.33333333333334, + "r_x2": 364.0, + "r_y2": 105.66666666666666, + "r_x3": 232.66666666666666, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.66666666666663, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.66666666666663, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33333333333334, + "r_x1": 349.6666666666667, + "r_y1": 204.33333333333334, + "r_x2": 349.6666666666667, + "r_y2": 188.33333333333331, + "r_x3": 247.0, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.3333333333333, + "r_y1": 236.0, + "r_x2": 324.3333333333333, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.6666666666667, + "r_y1": 284.0, + "r_x2": 349.6666666666667, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33333333333337, + "r_x1": 324.3333333333333, + "r_y1": 315.33333333333337, + "r_x2": 324.3333333333333, + "r_y2": 299.66666666666663, + "r_x3": 277.0, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.3333333333333, + "r_y0": 121.66666666666666, + "r_x1": 518.3333333333333, + "r_y1": 121.66666666666666, + "r_x2": 518.3333333333333, + "r_y2": 105.66666666666666, + "r_x3": 406.3333333333333, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.3333333333333, + "r_y0": 153.0, + "r_x1": 499.3333333333333, + "r_y1": 153.0, + "r_x2": 499.3333333333333, + "r_y2": 137.0, + "r_x3": 429.3333333333333, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 204.33333333333334, + "r_x1": 514.0, + "r_y1": 204.33333333333334, + "r_x2": 514.0, + "r_y2": 188.33333333333331, + "r_x3": 408.3333333333333, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 236.0, + "r_x1": 487.6666666666667, + "r_y1": 236.0, + "r_x2": 487.6666666666667, + "r_y2": 220.0, + "r_x3": 440.6666666666667, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.3333333333333, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 315.33333333333337, + "r_x1": 487.6666666666667, + "r_y1": 315.33333333333337, + "r_x2": 487.6666666666667, + "r_y2": 299.66666666666663, + "r_x3": 440.6666666666667, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 97.33333333333333, + "t": 105.66666666666666, + "r": 190.0, + "b": 126.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 97.33333333333333, + "r_y0": 126.33333333333334, + "r_x1": 190.0, + "r_y1": 126.33333333333334, + "r_x2": 190.0, + "r_y2": 105.66666666666666, + "r_x3": 97.33333333333333, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 112.0, + "t": 137.0, + "r": 182.33333333333334, + "b": 157.66666666666669, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 112.0, + "r_y0": 157.66666666666669, + "r_x1": 182.33333333333334, + "r_y1": 157.66666666666669, + "r_x2": 182.33333333333334, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 121.66666666666667, + "t": 204.33333333333334, + "r": 168.66666666666666, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666667, + "r_y0": 220.0, + "r_x1": 168.66666666666666, + "r_y1": 220.0, + "r_x2": 168.66666666666666, + "r_y2": 204.33333333333334, + "r_x3": 121.66666666666667, + "r_y3": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 121.66666666666667, + "t": 284.0, + "r": 168.66666666666666, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666667, + "r_y0": 300.0, + "r_x1": 168.66666666666666, + "r_y1": 300.0, + "r_x2": 168.66666666666666, + "r_y2": 284.0, + "r_x3": 121.66666666666667, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 232.66666666666666, + "t": 105.66666666666666, + "r": 364.0, + "b": 126.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.66666666666666, + "r_y0": 126.33333333333334, + "r_x1": 364.0, + "r_y1": 126.33333333333334, + "r_x2": 364.0, + "r_y2": 105.66666666666666, + "r_x3": 232.66666666666666, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 265.66666666666663, + "t": 137.0, + "r": 336.0, + "b": 153.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.66666666666663, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.66666666666663, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 247.0, + "t": 188.33333333333331, + "r": 349.6666666666667, + "b": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33333333333334, + "r_x1": 349.6666666666667, + "r_y1": 204.33333333333334, + "r_x2": 349.6666666666667, + "r_y2": 188.33333333333331, + "r_x3": 247.0, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 277.0, + "t": 220.0, + "r": 324.3333333333333, + "b": 236.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.3333333333333, + "r_y1": 236.0, + "r_x2": 324.3333333333333, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.6666666666667, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.6666666666667, + "r_y1": 284.0, + "r_x2": 349.6666666666667, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 277.0, + "t": 299.66666666666663, + "r": 324.3333333333333, + "b": 315.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33333333333337, + "r_x1": 324.3333333333333, + "r_y1": 315.33333333333337, + "r_x2": 324.3333333333333, + "r_y2": 299.66666666666663, + "r_x3": 277.0, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 406.3333333333333, + "t": 105.66666666666666, + "r": 518.3333333333333, + "b": 121.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.3333333333333, + "r_y0": 121.66666666666666, + "r_x1": 518.3333333333333, + "r_y1": 121.66666666666666, + "r_x2": 518.3333333333333, + "r_y2": 105.66666666666666, + "r_x3": 406.3333333333333, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 429.3333333333333, + "t": 137.0, + "r": 499.3333333333333, + "b": 153.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.3333333333333, + "r_y0": 153.0, + "r_x1": 499.3333333333333, + "r_y1": 153.0, + "r_x2": 499.3333333333333, + "r_y2": 137.0, + "r_x3": 429.3333333333333, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 408.3333333333333, + "t": 188.33333333333331, + "r": 514.0, + "b": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 204.33333333333334, + "r_x1": 514.0, + "r_y1": 204.33333333333334, + "r_x2": 514.0, + "r_y2": 188.33333333333331, + "r_x3": 408.3333333333333, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 440.6666666666667, + "t": 220.0, + "r": 487.6666666666667, + "b": 236.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 236.0, + "r_x1": 487.6666666666667, + "r_y1": 236.0, + "r_x2": 487.6666666666667, + "r_y2": 220.0, + "r_x3": 440.6666666666667, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 408.3333333333333, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.3333333333333, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 440.6666666666667, + "t": 299.66666666666663, + "r": 487.6666666666667, + "b": 315.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 315.33333333333337, + "r_x1": 487.6666666666667, + "r_y1": 315.33333333333337, + "r_x2": 487.6666666666667, + "r_y2": 299.66666666666663, + "r_x3": 440.6666666666667, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] } ] }, "tablestructure": { - "table_map": {} + "table_map": { + "0": { + "label": "table", + "id": 0, + "page_no": 0, + "cluster": { + "id": 0, + "label": "table", + "bbox": { + "l": 69.04969024658203, + "t": 87.64955139160156, + "r": 551.0990600585938, + "b": 334.58026123046875, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9790865778923035, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 97.33333333333333, + "r_y0": 126.33333333333334, + "r_x1": 190.0, + "r_y1": 126.33333333333334, + "r_x2": 190.0, + "r_y2": 105.66666666666666, + "r_x3": 97.33333333333333, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + }, + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 112.0, + "r_y0": 157.66666666666669, + "r_x1": 182.33333333333334, + "r_y1": 157.66666666666669, + "r_x2": 182.33333333333334, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666667, + "r_y0": 220.0, + "r_x1": 168.66666666666666, + "r_y1": 220.0, + "r_x2": 168.66666666666666, + "r_y2": 204.33333333333334, + "r_x3": 121.66666666666667, + "r_y3": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666667, + "r_y0": 300.0, + "r_x1": 168.66666666666666, + "r_y1": 300.0, + "r_x2": 168.66666666666666, + "r_y2": 284.0, + "r_x3": 121.66666666666667, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.66666666666666, + "r_y0": 126.33333333333334, + "r_x1": 364.0, + "r_y1": 126.33333333333334, + "r_x2": 364.0, + "r_y2": 105.66666666666666, + "r_x3": 232.66666666666666, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.66666666666663, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.66666666666663, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33333333333334, + "r_x1": 349.6666666666667, + "r_y1": 204.33333333333334, + "r_x2": 349.6666666666667, + "r_y2": 188.33333333333331, + "r_x3": 247.0, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.3333333333333, + "r_y1": 236.0, + "r_x2": 324.3333333333333, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.6666666666667, + "r_y1": 284.0, + "r_x2": 349.6666666666667, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33333333333337, + "r_x1": 324.3333333333333, + "r_y1": 315.33333333333337, + "r_x2": 324.3333333333333, + "r_y2": 299.66666666666663, + "r_x3": 277.0, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.3333333333333, + "r_y0": 121.66666666666666, + "r_x1": 518.3333333333333, + "r_y1": 121.66666666666666, + "r_x2": 518.3333333333333, + "r_y2": 105.66666666666666, + "r_x3": 406.3333333333333, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.3333333333333, + "r_y0": 153.0, + "r_x1": 499.3333333333333, + "r_y1": 153.0, + "r_x2": 499.3333333333333, + "r_y2": 137.0, + "r_x3": 429.3333333333333, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 204.33333333333334, + "r_x1": 514.0, + "r_y1": 204.33333333333334, + "r_x2": 514.0, + "r_y2": 188.33333333333331, + "r_x3": 408.3333333333333, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 236.0, + "r_x1": 487.6666666666667, + "r_y1": 236.0, + "r_x2": 487.6666666666667, + "r_y2": 220.0, + "r_x3": 440.6666666666667, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.3333333333333, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 315.33333333333337, + "r_x1": 487.6666666666667, + "r_y1": 315.33333333333337, + "r_x2": 487.6666666666667, + "r_y2": 299.66666666666663, + "r_x3": 440.6666666666667, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 97.33333333333333, + "t": 105.66666666666666, + "r": 190.0, + "b": 126.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 97.33333333333333, + "r_y0": 126.33333333333334, + "r_x1": 190.0, + "r_y1": 126.33333333333334, + "r_x2": 190.0, + "r_y2": 105.66666666666666, + "r_x3": 97.33333333333333, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 112.0, + "t": 137.0, + "r": 182.33333333333334, + "b": 157.66666666666669, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 112.0, + "r_y0": 157.66666666666669, + "r_x1": 182.33333333333334, + "r_y1": 157.66666666666669, + "r_x2": 182.33333333333334, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 121.66666666666667, + "t": 204.33333333333334, + "r": 168.66666666666666, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666667, + "r_y0": 220.0, + "r_x1": 168.66666666666666, + "r_y1": 220.0, + "r_x2": 168.66666666666666, + "r_y2": 204.33333333333334, + "r_x3": 121.66666666666667, + "r_y3": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 121.66666666666667, + "t": 284.0, + "r": 168.66666666666666, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666667, + "r_y0": 300.0, + "r_x1": 168.66666666666666, + "r_y1": 300.0, + "r_x2": 168.66666666666666, + "r_y2": 284.0, + "r_x3": 121.66666666666667, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 232.66666666666666, + "t": 105.66666666666666, + "r": 364.0, + "b": 126.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.66666666666666, + "r_y0": 126.33333333333334, + "r_x1": 364.0, + "r_y1": 126.33333333333334, + "r_x2": 364.0, + "r_y2": 105.66666666666666, + "r_x3": 232.66666666666666, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 265.66666666666663, + "t": 137.0, + "r": 336.0, + "b": 153.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.66666666666663, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.66666666666663, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 247.0, + "t": 188.33333333333331, + "r": 349.6666666666667, + "b": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33333333333334, + "r_x1": 349.6666666666667, + "r_y1": 204.33333333333334, + "r_x2": 349.6666666666667, + "r_y2": 188.33333333333331, + "r_x3": 247.0, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 277.0, + "t": 220.0, + "r": 324.3333333333333, + "b": 236.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.3333333333333, + "r_y1": 236.0, + "r_x2": 324.3333333333333, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.6666666666667, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.6666666666667, + "r_y1": 284.0, + "r_x2": 349.6666666666667, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 277.0, + "t": 299.66666666666663, + "r": 324.3333333333333, + "b": 315.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33333333333337, + "r_x1": 324.3333333333333, + "r_y1": 315.33333333333337, + "r_x2": 324.3333333333333, + "r_y2": 299.66666666666663, + "r_x3": 277.0, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 406.3333333333333, + "t": 105.66666666666666, + "r": 518.3333333333333, + "b": 121.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.3333333333333, + "r_y0": 121.66666666666666, + "r_x1": 518.3333333333333, + "r_y1": 121.66666666666666, + "r_x2": 518.3333333333333, + "r_y2": 105.66666666666666, + "r_x3": 406.3333333333333, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 429.3333333333333, + "t": 137.0, + "r": 499.3333333333333, + "b": 153.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.3333333333333, + "r_y0": 153.0, + "r_x1": 499.3333333333333, + "r_y1": 153.0, + "r_x2": 499.3333333333333, + "r_y2": 137.0, + "r_x3": 429.3333333333333, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 408.3333333333333, + "t": 188.33333333333331, + "r": 514.0, + "b": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 204.33333333333334, + "r_x1": 514.0, + "r_y1": 204.33333333333334, + "r_x2": 514.0, + "r_y2": 188.33333333333331, + "r_x3": 408.3333333333333, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 440.6666666666667, + "t": 220.0, + "r": 487.6666666666667, + "b": 236.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 236.0, + "r_x1": 487.6666666666667, + "r_y1": 236.0, + "r_x2": 487.6666666666667, + "r_y2": 220.0, + "r_x3": 440.6666666666667, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 408.3333333333333, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.3333333333333, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 440.6666666666667, + "t": 299.66666666666663, + "r": 487.6666666666667, + "b": 315.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 315.33333333333337, + "r_x1": 487.6666666666667, + "r_y1": 315.33333333333337, + "r_x2": 487.6666666666667, + "r_y2": 299.66666666666663, + "r_x3": 440.6666666666667, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 3, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 97.33333333333333, + "t": 105.66666666666666, + "r": 190.0, + "b": 126.33333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Vertically merged", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.66666666666667, + "t": 204.33333333333334, + "r": 168.66666666666666, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.66666666666667, + "t": 284.0, + "r": 168.66666666666666, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.66666666666666, + "t": 105.66666666666666, + "r": 364.0, + "b": 126.33333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Other merged column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 188.33333333333331, + "r": 349.6666666666667, + "b": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.6666666666667, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.3333333333333, + "t": 105.66666666666666, + "r": 518.3333333333333, + "b": 121.66666666666666, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.3333333333333, + "t": 188.33333333333331, + "r": 514.0, + "b": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.3333333333333, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + } + } }, "figures_classification": null, "equations_prediction": null, @@ -247,20 +2790,20 @@ "assembled": { "elements": [ { - "label": "text", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 69.6796630536824, - "t": 76.99999977896756, - "r": 504.8720051760782, - "b": 152.90926970226084, + "l": 69.04969024658203, + "t": 87.64955139160156, + "r": 551.0990600585938, + "b": 334.58026123046875, "coord_origin": "TOPLEFT" }, - "confidence": 0.9715733528137207, + "confidence": 0.9790865778923035, "cells": [ { "index": 0, @@ -271,20 +2814,20 @@ "a": 255 }, "rect": { - "r_x0": 73.34702132031646, - "r_y0": 97.99999977896755, - "r_x1": 503.64955224479564, - "r_y1": 97.99999977896755, - "r_x2": 503.64955224479564, - "r_y2": 76.99999977896756, - "r_x3": 73.34702132031646, - "r_y3": 76.99999977896756, + "r_x0": 97.33333333333333, + "r_y0": 126.33333333333334, + "r_x1": 190.0, + "r_y1": 126.33333333333334, + "r_x2": 190.0, + "r_y2": 105.66666666666666, + "r_x3": 97.33333333333333, + "r_y3": 105.66666666666666, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 89.0, "from_ocr": true }, { @@ -296,20 +2839,20 @@ "a": 255 }, "rect": { - "r_x0": 69.6796630536824, - "r_y0": 124.83139494707741, - "r_x1": 504.8720051760782, - "r_y1": 124.83139494707741, - "r_x2": 504.8720051760782, - "r_y2": 104.00000011573796, - "r_x3": 69.6796630536824, - "r_y3": 104.00000011573796, + "r_x0": 112.0, + "r_y0": 157.66666666666669, + "r_x1": 182.33333333333334, + "r_y1": 157.66666666666669, + "r_x2": 182.33333333333334, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true }, { @@ -321,44 +2864,1199 @@ "a": 255 }, "rect": { - "r_x0": 71.84193505100733, - "r_y0": 152.90926970226084, - "r_x1": 153.088934155825, - "r_y1": 152.90926970226084, - "r_x2": 153.088934155825, - "r_y2": 129.797125232046, - "r_x3": 71.84193505100733, - "r_y3": 129.797125232046, + "r_x0": 121.66666666666667, + "r_y0": 220.0, + "r_x1": 168.66666666666666, + "r_y1": 220.0, + "r_x2": 168.66666666666666, + "r_y2": 204.33333333333334, + "r_x3": 121.66666666666667, + "r_y3": 204.33333333333334, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666667, + "r_y0": 300.0, + "r_x1": 168.66666666666666, + "r_y1": 300.0, + "r_x2": 168.66666666666666, + "r_y2": 284.0, + "r_x3": 121.66666666666667, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.66666666666666, + "r_y0": 126.33333333333334, + "r_x1": 364.0, + "r_y1": 126.33333333333334, + "r_x2": 364.0, + "r_y2": 105.66666666666666, + "r_x3": 232.66666666666666, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.66666666666663, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.66666666666663, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33333333333334, + "r_x1": 349.6666666666667, + "r_y1": 204.33333333333334, + "r_x2": 349.6666666666667, + "r_y2": 188.33333333333331, + "r_x3": 247.0, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.3333333333333, + "r_y1": 236.0, + "r_x2": 324.3333333333333, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.6666666666667, + "r_y1": 284.0, + "r_x2": 349.6666666666667, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33333333333337, + "r_x1": 324.3333333333333, + "r_y1": 315.33333333333337, + "r_x2": 324.3333333333333, + "r_y2": 299.66666666666663, + "r_x3": 277.0, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.3333333333333, + "r_y0": 121.66666666666666, + "r_x1": 518.3333333333333, + "r_y1": 121.66666666666666, + "r_x2": 518.3333333333333, + "r_y2": 105.66666666666666, + "r_x3": 406.3333333333333, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.3333333333333, + "r_y0": 153.0, + "r_x1": 499.3333333333333, + "r_y1": 153.0, + "r_x2": 499.3333333333333, + "r_y2": 137.0, + "r_x3": 429.3333333333333, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 204.33333333333334, + "r_x1": 514.0, + "r_y1": 204.33333333333334, + "r_x2": 514.0, + "r_y2": 188.33333333333331, + "r_x3": 408.3333333333333, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 236.0, + "r_x1": 487.6666666666667, + "r_y1": 236.0, + "r_x2": 487.6666666666667, + "r_y2": 220.0, + "r_x3": 440.6666666666667, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.3333333333333, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 315.33333333333337, + "r_x1": 487.6666666666667, + "r_y1": 315.33333333333337, + "r_x2": 487.6666666666667, + "r_y2": 299.66666666666663, + "r_x3": 440.6666666666667, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 97.33333333333333, + "t": 105.66666666666666, + "r": 190.0, + "b": 126.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 97.33333333333333, + "r_y0": 126.33333333333334, + "r_x1": 190.0, + "r_y1": 126.33333333333334, + "r_x2": 190.0, + "r_y2": 105.66666666666666, + "r_x3": 97.33333333333333, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 112.0, + "t": 137.0, + "r": 182.33333333333334, + "b": 157.66666666666669, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 112.0, + "r_y0": 157.66666666666669, + "r_x1": 182.33333333333334, + "r_y1": 157.66666666666669, + "r_x2": 182.33333333333334, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 121.66666666666667, + "t": 204.33333333333334, + "r": 168.66666666666666, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666667, + "r_y0": 220.0, + "r_x1": 168.66666666666666, + "r_y1": 220.0, + "r_x2": 168.66666666666666, + "r_y2": 204.33333333333334, + "r_x3": 121.66666666666667, + "r_y3": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 121.66666666666667, + "t": 284.0, + "r": 168.66666666666666, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666667, + "r_y0": 300.0, + "r_x1": 168.66666666666666, + "r_y1": 300.0, + "r_x2": 168.66666666666666, + "r_y2": 284.0, + "r_x3": 121.66666666666667, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 232.66666666666666, + "t": 105.66666666666666, + "r": 364.0, + "b": 126.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.66666666666666, + "r_y0": 126.33333333333334, + "r_x1": 364.0, + "r_y1": 126.33333333333334, + "r_x2": 364.0, + "r_y2": 105.66666666666666, + "r_x3": 232.66666666666666, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 265.66666666666663, + "t": 137.0, + "r": 336.0, + "b": 153.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.66666666666663, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.66666666666663, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 247.0, + "t": 188.33333333333331, + "r": 349.6666666666667, + "b": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33333333333334, + "r_x1": 349.6666666666667, + "r_y1": 204.33333333333334, + "r_x2": 349.6666666666667, + "r_y2": 188.33333333333331, + "r_x3": 247.0, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 277.0, + "t": 220.0, + "r": 324.3333333333333, + "b": 236.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.3333333333333, + "r_y1": 236.0, + "r_x2": 324.3333333333333, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.6666666666667, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.6666666666667, + "r_y1": 284.0, + "r_x2": 349.6666666666667, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 277.0, + "t": 299.66666666666663, + "r": 324.3333333333333, + "b": 315.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33333333333337, + "r_x1": 324.3333333333333, + "r_y1": 315.33333333333337, + "r_x2": 324.3333333333333, + "r_y2": 299.66666666666663, + "r_x3": 277.0, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 406.3333333333333, + "t": 105.66666666666666, + "r": 518.3333333333333, + "b": 121.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.3333333333333, + "r_y0": 121.66666666666666, + "r_x1": 518.3333333333333, + "r_y1": 121.66666666666666, + "r_x2": 518.3333333333333, + "r_y2": 105.66666666666666, + "r_x3": 406.3333333333333, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 429.3333333333333, + "t": 137.0, + "r": 499.3333333333333, + "b": 153.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.3333333333333, + "r_y0": 153.0, + "r_x1": 499.3333333333333, + "r_y1": 153.0, + "r_x2": 499.3333333333333, + "r_y2": 137.0, + "r_x3": 429.3333333333333, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 408.3333333333333, + "t": 188.33333333333331, + "r": 514.0, + "b": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 204.33333333333334, + "r_x1": 514.0, + "r_y1": 204.33333333333334, + "r_x2": 514.0, + "r_y2": 188.33333333333331, + "r_x3": 408.3333333333333, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 440.6666666666667, + "t": 220.0, + "r": 487.6666666666667, + "b": 236.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 236.0, + "r_x1": 487.6666666666667, + "r_y1": 236.0, + "r_x2": 487.6666666666667, + "r_y2": 220.0, + "r_x3": 440.6666666666667, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 408.3333333333333, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.3333333333333, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 440.6666666666667, + "t": 299.66666666666663, + "r": 487.6666666666667, + "b": 315.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 315.33333333333337, + "r_x1": 487.6666666666667, + "r_y1": 315.33333333333337, + "r_x2": 487.6666666666667, + "r_y2": 299.66666666666663, + "r_x3": 440.6666666666667, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 3, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 97.33333333333333, + "t": 105.66666666666666, + "r": 190.0, + "b": 126.33333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Vertically merged", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.66666666666667, + "t": 204.33333333333334, + "r": 168.66666666666666, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.66666666666667, + "t": 284.0, + "r": 168.66666666666666, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.66666666666666, + "t": 105.66666666666666, + "r": 364.0, + "b": 126.33333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Other merged column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 188.33333333333331, + "r": 349.6666666666667, + "b": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.6666666666667, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.3333333333333, + "t": 105.66666666666666, + "r": 518.3333333333333, + "b": 121.66666666666666, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.3333333333333, + "t": 188.33333333333331, + "r": 514.0, + "b": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.3333333333333, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } ], "body": [ { - "label": "text", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 69.6796630536824, - "t": 76.99999977896756, - "r": 504.8720051760782, - "b": 152.90926970226084, + "l": 69.04969024658203, + "t": 87.64955139160156, + "r": 551.0990600585938, + "b": 334.58026123046875, "coord_origin": "TOPLEFT" }, - "confidence": 0.9715733528137207, + "confidence": 0.9790865778923035, "cells": [ { "index": 0, @@ -369,20 +4067,20 @@ "a": 255 }, "rect": { - "r_x0": 73.34702132031646, - "r_y0": 97.99999977896755, - "r_x1": 503.64955224479564, - "r_y1": 97.99999977896755, - "r_x2": 503.64955224479564, - "r_y2": 76.99999977896756, - "r_x3": 73.34702132031646, - "r_y3": 76.99999977896756, + "r_x0": 97.33333333333333, + "r_y0": 126.33333333333334, + "r_x1": 190.0, + "r_y1": 126.33333333333334, + "r_x2": 190.0, + "r_y2": 105.66666666666666, + "r_x3": 97.33333333333333, + "r_y3": 105.66666666666666, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 89.0, "from_ocr": true }, { @@ -394,20 +4092,20 @@ "a": 255 }, "rect": { - "r_x0": 69.6796630536824, - "r_y0": 124.83139494707741, - "r_x1": 504.8720051760782, - "r_y1": 124.83139494707741, - "r_x2": 504.8720051760782, - "r_y2": 104.00000011573796, - "r_x3": 69.6796630536824, - "r_y3": 104.00000011573796, + "r_x0": 112.0, + "r_y0": 157.66666666666669, + "r_x1": 182.33333333333334, + "r_y1": 157.66666666666669, + "r_x2": 182.33333333333334, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true }, { @@ -419,26 +4117,1181 @@ "a": 255 }, "rect": { - "r_x0": 71.84193505100733, - "r_y0": 152.90926970226084, - "r_x1": 153.088934155825, - "r_y1": 152.90926970226084, - "r_x2": 153.088934155825, - "r_y2": 129.797125232046, - "r_x3": 71.84193505100733, - "r_y3": 129.797125232046, + "r_x0": 121.66666666666667, + "r_y0": 220.0, + "r_x1": 168.66666666666666, + "r_y1": 220.0, + "r_x2": 168.66666666666666, + "r_y2": 204.33333333333334, + "r_x3": 121.66666666666667, + "r_y3": 204.33333333333334, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666667, + "r_y0": 300.0, + "r_x1": 168.66666666666666, + "r_y1": 300.0, + "r_x2": 168.66666666666666, + "r_y2": 284.0, + "r_x3": 121.66666666666667, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.66666666666666, + "r_y0": 126.33333333333334, + "r_x1": 364.0, + "r_y1": 126.33333333333334, + "r_x2": 364.0, + "r_y2": 105.66666666666666, + "r_x3": 232.66666666666666, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.66666666666663, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.66666666666663, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33333333333334, + "r_x1": 349.6666666666667, + "r_y1": 204.33333333333334, + "r_x2": 349.6666666666667, + "r_y2": 188.33333333333331, + "r_x3": 247.0, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.3333333333333, + "r_y1": 236.0, + "r_x2": 324.3333333333333, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.6666666666667, + "r_y1": 284.0, + "r_x2": 349.6666666666667, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33333333333337, + "r_x1": 324.3333333333333, + "r_y1": 315.33333333333337, + "r_x2": 324.3333333333333, + "r_y2": 299.66666666666663, + "r_x3": 277.0, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.3333333333333, + "r_y0": 121.66666666666666, + "r_x1": 518.3333333333333, + "r_y1": 121.66666666666666, + "r_x2": 518.3333333333333, + "r_y2": 105.66666666666666, + "r_x3": 406.3333333333333, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.3333333333333, + "r_y0": 153.0, + "r_x1": 499.3333333333333, + "r_y1": 153.0, + "r_x2": 499.3333333333333, + "r_y2": 137.0, + "r_x3": 429.3333333333333, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 204.33333333333334, + "r_x1": 514.0, + "r_y1": 204.33333333333334, + "r_x2": 514.0, + "r_y2": 188.33333333333331, + "r_x3": 408.3333333333333, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 236.0, + "r_x1": 487.6666666666667, + "r_y1": 236.0, + "r_x2": 487.6666666666667, + "r_y2": 220.0, + "r_x3": 440.6666666666667, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.3333333333333, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 315.33333333333337, + "r_x1": 487.6666666666667, + "r_y1": 315.33333333333337, + "r_x2": 487.6666666666667, + "r_y2": 299.66666666666663, + "r_x3": 440.6666666666667, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 97.33333333333333, + "t": 105.66666666666666, + "r": 190.0, + "b": 126.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 97.33333333333333, + "r_y0": 126.33333333333334, + "r_x1": 190.0, + "r_y1": 126.33333333333334, + "r_x2": 190.0, + "r_y2": 105.66666666666666, + "r_x3": 97.33333333333333, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 112.0, + "t": 137.0, + "r": 182.33333333333334, + "b": 157.66666666666669, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 112.0, + "r_y0": 157.66666666666669, + "r_x1": 182.33333333333334, + "r_y1": 157.66666666666669, + "r_x2": 182.33333333333334, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 121.66666666666667, + "t": 204.33333333333334, + "r": 168.66666666666666, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666667, + "r_y0": 220.0, + "r_x1": 168.66666666666666, + "r_y1": 220.0, + "r_x2": 168.66666666666666, + "r_y2": 204.33333333333334, + "r_x3": 121.66666666666667, + "r_y3": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 121.66666666666667, + "t": 284.0, + "r": 168.66666666666666, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666667, + "r_y0": 300.0, + "r_x1": 168.66666666666666, + "r_y1": 300.0, + "r_x2": 168.66666666666666, + "r_y2": 284.0, + "r_x3": 121.66666666666667, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 232.66666666666666, + "t": 105.66666666666666, + "r": 364.0, + "b": 126.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.66666666666666, + "r_y0": 126.33333333333334, + "r_x1": 364.0, + "r_y1": 126.33333333333334, + "r_x2": 364.0, + "r_y2": 105.66666666666666, + "r_x3": 232.66666666666666, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 265.66666666666663, + "t": 137.0, + "r": 336.0, + "b": 153.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.66666666666663, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.66666666666663, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 247.0, + "t": 188.33333333333331, + "r": 349.6666666666667, + "b": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33333333333334, + "r_x1": 349.6666666666667, + "r_y1": 204.33333333333334, + "r_x2": 349.6666666666667, + "r_y2": 188.33333333333331, + "r_x3": 247.0, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 277.0, + "t": 220.0, + "r": 324.3333333333333, + "b": 236.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.3333333333333, + "r_y1": 236.0, + "r_x2": 324.3333333333333, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.6666666666667, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.6666666666667, + "r_y1": 284.0, + "r_x2": 349.6666666666667, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 277.0, + "t": 299.66666666666663, + "r": 324.3333333333333, + "b": 315.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33333333333337, + "r_x1": 324.3333333333333, + "r_y1": 315.33333333333337, + "r_x2": 324.3333333333333, + "r_y2": 299.66666666666663, + "r_x3": 277.0, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 406.3333333333333, + "t": 105.66666666666666, + "r": 518.3333333333333, + "b": 121.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.3333333333333, + "r_y0": 121.66666666666666, + "r_x1": 518.3333333333333, + "r_y1": 121.66666666666666, + "r_x2": 518.3333333333333, + "r_y2": 105.66666666666666, + "r_x3": 406.3333333333333, + "r_y3": 105.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 429.3333333333333, + "t": 137.0, + "r": 499.3333333333333, + "b": 153.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.3333333333333, + "r_y0": 153.0, + "r_x1": 499.3333333333333, + "r_y1": 153.0, + "r_x2": 499.3333333333333, + "r_y2": 137.0, + "r_x3": 429.3333333333333, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 408.3333333333333, + "t": 188.33333333333331, + "r": 514.0, + "b": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 204.33333333333334, + "r_x1": 514.0, + "r_y1": 204.33333333333334, + "r_x2": 514.0, + "r_y2": 188.33333333333331, + "r_x3": 408.3333333333333, + "r_y3": 188.33333333333331, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 440.6666666666667, + "t": 220.0, + "r": 487.6666666666667, + "b": 236.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 236.0, + "r_x1": 487.6666666666667, + "r_y1": 236.0, + "r_x2": 487.6666666666667, + "r_y2": 220.0, + "r_x3": 440.6666666666667, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 408.3333333333333, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.3333333333333, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.3333333333333, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 440.6666666666667, + "t": 299.66666666666663, + "r": 487.6666666666667, + "b": 315.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.6666666666667, + "r_y0": 315.33333333333337, + "r_x1": 487.6666666666667, + "r_y1": 315.33333333333337, + "r_x2": 487.6666666666667, + "r_y2": 299.66666666666663, + "r_x3": 440.6666666666667, + "r_y3": 299.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 3, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 97.33333333333333, + "t": 105.66666666666666, + "r": 190.0, + "b": 126.33333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Vertically merged", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.66666666666667, + "t": 204.33333333333334, + "r": 168.66666666666666, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.66666666666667, + "t": 284.0, + "r": 168.66666666666666, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.66666666666666, + "t": 105.66666666666666, + "r": 364.0, + "b": 126.33333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Other merged column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 188.33333333333331, + "r": 349.6666666666667, + "b": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.6666666666667, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.3333333333333, + "t": 105.66666666666666, + "r": 518.3333333333333, + "b": 121.66666666666666, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.3333333333333, + "t": 188.33333333333331, + "r": 514.0, + "b": 204.33333333333334, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.3333333333333, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } ], "headers": [] diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.doctags.txt b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.doctags.txt deleted file mode 100644 index 0b7a3a14..00000000 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.doctags.txt +++ /dev/null @@ -1,3 +0,0 @@ - -package - \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.json deleted file mode 100644 index 128a8527..00000000 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.json +++ /dev/null @@ -1 +0,0 @@ -{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "ocr_test_rotated.pdf", "filename-prov": null, "document-hash": "4a282813d93824eaa9bc2a0b2a0d6d626ecc8f5f380bd1320e2dd3e8e53c2ba6", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [{"hash": "f8a4dc72d8b159f69d0bc968b97f3fb9e0ac59dcb3113492432755835935d9b3", "model": "default", "page": 1}]}, "main-text": [{"prov": [{"bbox": [131.21306574279092, 74.12495603322407, 152.19606490864376, 154.19400205373182], "page": 1, "span": [0, 7], "__ref_s3_data": null}], "text": "package", "type": "paragraph", "payload": null, "name": "Text", "font": null}], "figures": [], "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 595.201171875, "page": 1, "width": 841.9216918945312}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null} \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.md b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.md deleted file mode 100644 index 597acc76..00000000 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.md +++ /dev/null @@ -1 +0,0 @@ -package \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.pages.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.pages.json deleted file mode 100644 index fdc46eda..00000000 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated.pages.json +++ /dev/null @@ -1 +0,0 @@ -[{"page_no": 0, "size": {"width": 841.9216918945312, "height": 595.201171875}, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "package"}], "headers": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}]}}] \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.doctags.txt b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.doctags.txt index 029be08d..4de7af73 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.doctags.txt +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.doctags.txt @@ -1,4 +1,8 @@ -package -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained + + +Vertically mergedOther merged columnYet another column +valueSome other valueYet another value +valueSome other valueYet another value +
\ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.json index 8de137d4..b02a5df9 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.json +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.json @@ -27,13 +27,13 @@ "file-info": { "filename": "ocr_test_rotated_180.pdf", "filename-prov": null, - "document-hash": "a9cbfe0f2a71171face9ee31d2347ca4195649670ad75680520d67d4a863f982", + "document-hash": "361fa0fc8db9c3a973d316d08509ac78cc0e7f81dea94358319092640d439ca0", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [ { - "hash": "baca27070f05dd84cf0903ded39bcf0fc1fa6ef0ac390e79cf8ba90c8c33ba49", + "hash": "ab89ee70d4aee0b8dc5ed72ad42e16e98a8ec9c2eea1e03d99b50c25bbc5a806", "model": "default", "page": 1 } @@ -41,62 +41,307 @@ }, "main-text": [ { - "prov": [ - { - "bbox": [ - 441.2561096985719, - 131.89488404865142, - 522.0347860494834, - 151.87873262042876 - ], - "page": 1, - "span": [ - 0, - 7 - ], - "__ref_s3_data": null - } - ], - "text": "package", - "type": "paragraph", - "payload": null, - "name": "Text", - "font": null - }, + "name": "Table", + "type": "table", + "$ref": "#/tables/0" + } + ], + "figures": [], + "tables": [ { "prov": [ { "bbox": [ - 89.23887497045128, - 77.02339852098021, - 523.208764293368, - 124.75312428291147 + 240.90093994140625, + 87.64955139160156, + 722.950309753418, + 334.58026123046875 ], "page": 1, "span": [ 0, - 86 + 0 ], "__ref_s3_data": null } ], - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", - "type": "paragraph", + "text": "", + "type": "table", "payload": null, - "name": "Text", - "font": null + "#-cols": 3, + "#-rows": 3, + "data": [ + [ + { + "bbox": [ + 97.33333333333337, + 105.66666666666669, + 190.0, + 126.33333333333337 + ], + "spans": [ + [ + 0, + 0 + ] + ], + "text": "Vertically merged", + "type": "col_header", + "col": 0, + "col-header": true, + "col-span": [ + 0, + 1 + ], + "row": 0, + "row-header": false, + "row-span": [ + 0, + 1 + ] + }, + { + "bbox": [ + 232.33333333333326, + 105.66666666666669, + 363.6666666666667, + 126.33333333333337 + ], + "spans": [ + [ + 0, + 1 + ] + ], + "text": "Other merged column", + "type": "col_header", + "col": 1, + "col-header": true, + "col-span": [ + 1, + 2 + ], + "row": 0, + "row-header": false, + "row-span": [ + 0, + 1 + ] + }, + { + "bbox": [ + 406.3333333333333, + 105.66666666666669, + 518.0, + 121.66666666666663 + ], + "spans": [ + [ + 0, + 2 + ] + ], + "text": "Yet another column", + "type": "col_header", + "col": 2, + "col-header": true, + "col-span": [ + 2, + 3 + ], + "row": 0, + "row-header": false, + "row-span": [ + 0, + 1 + ] + } + ], + [ + { + "bbox": [ + 121.66666666666663, + 204.0, + 168.66666666666663, + 220.0 + ], + "spans": [ + [ + 1, + 0 + ] + ], + "text": "value", + "type": "body", + "col": 0, + "col-header": false, + "col-span": [ + 0, + 1 + ], + "row": 1, + "row-header": false, + "row-span": [ + 1, + 2 + ] + }, + { + "bbox": [ + 247.0, + 188.0, + 349.6666666666667, + 204.0 + ], + "spans": [ + [ + 1, + 1 + ] + ], + "text": "Some other value", + "type": "body", + "col": 1, + "col-header": false, + "col-span": [ + 1, + 2 + ], + "row": 1, + "row-header": false, + "row-span": [ + 1, + 2 + ] + }, + { + "bbox": [ + 408.3333333333333, + 188.0, + 514.0, + 204.0 + ], + "spans": [ + [ + 1, + 2 + ] + ], + "text": "Yet another value", + "type": "body", + "col": 2, + "col-header": false, + "col-span": [ + 2, + 3 + ], + "row": 1, + "row-header": false, + "row-span": [ + 1, + 2 + ] + } + ], + [ + { + "bbox": [ + 121.66666666666663, + 284.0, + 168.66666666666663, + 300.0 + ], + "spans": [ + [ + 2, + 0 + ] + ], + "text": "value", + "type": "body", + "col": 0, + "col-header": false, + "col-span": [ + 0, + 1 + ], + "row": 2, + "row-header": false, + "row-span": [ + 2, + 3 + ] + }, + { + "bbox": [ + 247.0, + 268.0, + 349.6666666666667, + 284.0 + ], + "spans": [ + [ + 2, + 1 + ] + ], + "text": "Some other value", + "type": "body", + "col": 1, + "col-header": false, + "col-span": [ + 1, + 2 + ], + "row": 2, + "row-header": false, + "row-span": [ + 2, + 3 + ] + }, + { + "bbox": [ + 408.3333333333333, + 268.0, + 514.0, + 284.0 + ], + "spans": [ + [ + 2, + 2 + ] + ], + "text": "Yet another value", + "type": "body", + "col": 2, + "col-header": false, + "col-span": [ + 2, + 3 + ], + "row": 2, + "row-header": false, + "row-span": [ + 2, + 3 + ] + } + ] + ], + "model": null, + "bounding-box": null } ], - "figures": [], - "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [ { - "height": 841.9216918945312, + "height": 612.0, "page": 1, - "width": 595.201171875 + "width": 792.0 } ], "page-footers": [], diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.md b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.md index f5d50b5c..e3d7c0b8 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.md +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.md @@ -1,3 +1,4 @@ -package - -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained \ No newline at end of file +| Vertically merged | Other merged column | Yet another column | +|---------------------|-----------------------|----------------------| +| value | Some other value | Yet another value | +| value | Some other value | Yet another value | \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.pages.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.pages.json index 962861d9..de743661 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.pages.json +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_180.pages.json @@ -2,8 +2,8 @@ { "page_no": 0, "size": { - "width": 595.201171875, - "height": 841.9216918945312 + "width": 792.0, + "height": 612.0 }, "parsed_page": { "dimension": { @@ -11,47 +11,47 @@ "rect": { "r_x0": 0.0, "r_y0": 0.0, - "r_x1": 595.201171875, + "r_x1": 792.0, "r_y1": 0.0, - "r_x2": 595.201171875, - "r_y2": 841.9216918945312, + "r_x2": 792.0, + "r_y2": 612.0, "r_x3": 0.0, - "r_y3": 841.9216918945312, + "r_y3": 612.0, "coord_origin": "BOTTOMLEFT" }, "boundary_type": "crop_box", "art_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "bleed_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "crop_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "media_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "trim_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" } @@ -69,20 +69,20 @@ "a": 255 }, "rect": { - "r_x0": 89.2388782764286, - "r_y0": 764.898293373551, - "r_x1": 521.9863147998661, - "r_y1": 764.898293373551, - "r_x2": 521.9863147998661, - "r_y2": 744.0929853494625, - "r_x3": 89.2388782764286, - "r_y3": 744.0929853494625, + "r_x0": 694.6666666666666, + "r_y0": 485.66666666666663, + "r_x1": 602.0, + "r_y1": 485.66666666666663, + "r_x2": 602.0, + "r_y2": 506.3333333333333, + "r_x3": 694.6666666666666, + "r_y3": 506.3333333333333, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -94,20 +94,20 @@ "a": 255 }, "rect": { - "r_x0": 89.23887497045128, - "r_y0": 739.1977118987292, - "r_x1": 523.208764293368, - "r_y1": 739.1977118987292, - "r_x2": 523.208764293368, - "r_y2": 717.1685676116198, - "r_x3": 89.23887497045128, - "r_y3": 717.1685676116198, + "r_x0": 680.0, + "r_y0": 454.33333333333337, + "r_x1": 610.0, + "r_y1": 454.33333333333337, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -119,20 +119,345 @@ "a": 255 }, "rect": { - "r_x0": 441.2561096985719, - "r_y0": 710.0268078458798, - "r_x1": 522.0347860494834, - "r_y1": 710.0268078458798, - "r_x2": 522.0347860494834, - "r_y2": 690.0429592741025, - "r_x3": 441.2561096985719, - "r_y3": 690.0429592741025, + "r_x0": 670.3333333333334, + "r_y0": 392.0, + "r_x1": 623.3333333333334, + "r_y1": 392.0, + "r_x2": 623.3333333333334, + "r_y2": 408.0, + "r_x3": 670.3333333333334, + "r_y3": 408.0, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.3333333333334, + "r_y0": 312.0, + "r_x1": 623.3333333333334, + "r_y1": 312.0, + "r_x2": 623.3333333333334, + "r_y2": 328.0, + "r_x3": 670.3333333333334, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.6666666666667, + "r_y0": 485.66666666666663, + "r_x1": 428.3333333333333, + "r_y1": 485.66666666666663, + "r_x2": 428.3333333333333, + "r_y2": 506.3333333333333, + "r_x3": 559.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.6666666666667, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.6666666666667, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.3333333333333, + "r_y1": 408.0, + "r_x2": 442.3333333333333, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.3333333333333, + "r_y1": 328.0, + "r_x2": 442.3333333333333, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.6666666666667, + "r_x1": 468.0, + "r_y1": 296.6666666666667, + "r_x2": 468.0, + "r_y2": 312.6666666666667, + "r_x3": 515.0, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.6666666666667, + "r_y0": 490.33333333333337, + "r_x1": 274.0, + "r_y1": 490.33333333333337, + "r_x2": 274.0, + "r_y2": 506.3333333333333, + "r_x3": 385.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.66666666666663, + "r_y1": 459.0, + "r_x2": 292.66666666666663, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.6666666666667, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 376.0, + "r_x1": 304.33333333333337, + "r_y1": 376.0, + "r_x2": 304.33333333333337, + "r_y2": 392.0, + "r_x3": 351.33333333333337, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.6666666666667, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 296.6666666666667, + "r_x1": 304.33333333333337, + "r_y1": 296.6666666666667, + "r_x2": 304.33333333333337, + "r_y2": 312.6666666666667, + "r_x3": 351.33333333333337, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], @@ -147,15 +472,15 @@ "clusters": [ { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 89.23887497045128, - "t": 717.1685676116198, - "r": 523.208764293368, - "b": 764.898293373551, + "l": 240.90093994140625, + "t": 277.41973876953125, + "r": 722.950309753418, + "b": 524.3504486083984, "coord_origin": "TOPLEFT" }, - "confidence": 0.7318570613861084, + "confidence": 0.9790865778923035, "cells": [ { "index": 0, @@ -166,20 +491,20 @@ "a": 255 }, "rect": { - "r_x0": 89.2388782764286, - "r_y0": 764.898293373551, - "r_x1": 521.9863147998661, - "r_y1": 764.898293373551, - "r_x2": 521.9863147998661, - "r_y2": 744.0929853494625, - "r_x3": 89.2388782764286, - "r_y3": 744.0929853494625, + "r_x0": 694.6666666666666, + "r_y0": 485.66666666666663, + "r_x1": 602.0, + "r_y1": 485.66666666666663, + "r_x2": 602.0, + "r_y2": 506.3333333333333, + "r_x3": 694.6666666666666, + "r_y3": 506.3333333333333, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -191,37 +516,22 @@ "a": 255 }, "rect": { - "r_x0": 89.23887497045128, - "r_y0": 739.1977118987292, - "r_x1": 523.208764293368, - "r_y1": 739.1977118987292, - "r_x2": 523.208764293368, - "r_y2": 717.1685676116198, - "r_x3": 89.23887497045128, - "r_y3": 717.1685676116198, + "r_x0": 680.0, + "r_y0": 454.33333333333337, + "r_x1": 610.0, + "r_y1": 454.33333333333337, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true - } - ], - "children": [] - }, - { - "id": 2, - "label": "text", - "bbox": { - "l": 441.2561096985719, - "t": 690.0429592741025, - "r": 522.0347860494834, - "b": 710.0268078458798, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.5982133150100708, - "cells": [ + }, { "index": 2, "rgba": { @@ -231,29 +541,2247 @@ "a": 255 }, "rect": { - "r_x0": 441.2561096985719, - "r_y0": 710.0268078458798, - "r_x1": 522.0347860494834, - "r_y1": 710.0268078458798, - "r_x2": 522.0347860494834, - "r_y2": 690.0429592741025, - "r_x3": 441.2561096985719, - "r_y3": 690.0429592741025, + "r_x0": 670.3333333333334, + "r_y0": 392.0, + "r_x1": 623.3333333333334, + "r_y1": 392.0, + "r_x2": 623.3333333333334, + "r_y2": 408.0, + "r_x3": 670.3333333333334, + "r_y3": 408.0, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.3333333333334, + "r_y0": 312.0, + "r_x1": 623.3333333333334, + "r_y1": 312.0, + "r_x2": 623.3333333333334, + "r_y2": 328.0, + "r_x3": 670.3333333333334, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.6666666666667, + "r_y0": 485.66666666666663, + "r_x1": 428.3333333333333, + "r_y1": 485.66666666666663, + "r_x2": 428.3333333333333, + "r_y2": 506.3333333333333, + "r_x3": 559.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.6666666666667, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.6666666666667, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.3333333333333, + "r_y1": 408.0, + "r_x2": 442.3333333333333, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.3333333333333, + "r_y1": 328.0, + "r_x2": 442.3333333333333, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.6666666666667, + "r_x1": 468.0, + "r_y1": 296.6666666666667, + "r_x2": 468.0, + "r_y2": 312.6666666666667, + "r_x3": 515.0, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.6666666666667, + "r_y0": 490.33333333333337, + "r_x1": 274.0, + "r_y1": 490.33333333333337, + "r_x2": 274.0, + "r_y2": 506.3333333333333, + "r_x3": 385.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.66666666666663, + "r_y1": 459.0, + "r_x2": 292.66666666666663, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.6666666666667, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 376.0, + "r_x1": 304.33333333333337, + "r_y1": 376.0, + "r_x2": 304.33333333333337, + "r_y2": 392.0, + "r_x3": 351.33333333333337, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.6666666666667, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 296.6666666666667, + "r_x1": 304.33333333333337, + "r_y1": 296.6666666666667, + "r_x2": 304.33333333333337, + "r_y2": 312.6666666666667, + "r_x3": 351.33333333333337, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 602.0, + "t": 485.66666666666663, + "r": 694.6666666666666, + "b": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 694.6666666666666, + "r_y0": 485.66666666666663, + "r_x1": 602.0, + "r_y1": 485.66666666666663, + "r_x2": 602.0, + "r_y2": 506.3333333333333, + "r_x3": 694.6666666666666, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 610.0, + "t": 454.33333333333337, + "r": 680.0, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 680.0, + "r_y0": 454.33333333333337, + "r_x1": 610.0, + "r_y1": 454.33333333333337, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 623.3333333333334, + "t": 392.0, + "r": 670.3333333333334, + "b": 408.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.3333333333334, + "r_y0": 392.0, + "r_x1": 623.3333333333334, + "r_y1": 392.0, + "r_x2": 623.3333333333334, + "r_y2": 408.0, + "r_x3": 670.3333333333334, + "r_y3": 408.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 623.3333333333334, + "t": 312.0, + "r": 670.3333333333334, + "b": 328.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.3333333333334, + "r_y0": 312.0, + "r_x1": 623.3333333333334, + "r_y1": 312.0, + "r_x2": 623.3333333333334, + "r_y2": 328.0, + "r_x3": 670.3333333333334, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 428.3333333333333, + "t": 485.66666666666663, + "r": 559.6666666666667, + "b": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.6666666666667, + "r_y0": 485.66666666666663, + "r_x1": 428.3333333333333, + "r_y1": 485.66666666666663, + "r_x2": 428.3333333333333, + "r_y2": 506.3333333333333, + "r_x3": 559.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 456.0, + "t": 459.0, + "r": 526.6666666666667, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.6666666666667, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.6666666666667, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 442.3333333333333, + "t": 408.0, + "r": 545.0, + "b": 424.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 91.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.3333333333333, + "r_y1": 408.0, + "r_x2": 442.3333333333333, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 468.0, + "t": 376.0, + "r": 515.0, + "b": 392.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 442.3333333333333, + "t": 328.0, + "r": 545.0, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.3333333333333, + "r_y1": 328.0, + "r_x2": 442.3333333333333, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 468.0, + "t": 296.6666666666667, + "r": 515.0, + "b": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.6666666666667, + "r_x1": 468.0, + "r_y1": 296.6666666666667, + "r_x2": 468.0, + "r_y2": 312.6666666666667, + "r_x3": 515.0, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 274.0, + "t": 490.33333333333337, + "r": 385.6666666666667, + "b": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.6666666666667, + "r_y0": 490.33333333333337, + "r_x1": 274.0, + "r_y1": 490.33333333333337, + "r_x2": 274.0, + "r_y2": 506.3333333333333, + "r_x3": 385.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 292.66666666666663, + "t": 459.0, + "r": 363.0, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.66666666666663, + "r_y1": 459.0, + "r_x2": 292.66666666666663, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 278.0, + "t": 408.0, + "r": 383.6666666666667, + "b": 424.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.6666666666667, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 304.33333333333337, + "t": 376.0, + "r": 351.33333333333337, + "b": 392.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 376.0, + "r_x1": 304.33333333333337, + "r_y1": 376.0, + "r_x2": 304.33333333333337, + "r_y2": 392.0, + "r_x3": 351.33333333333337, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 278.0, + "t": 328.0, + "r": 383.6666666666667, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.6666666666667, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 304.33333333333337, + "t": 296.6666666666667, + "r": 351.33333333333337, + "b": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 296.6666666666667, + "r_x1": 304.33333333333337, + "r_y1": 296.6666666666667, + "r_x2": 304.33333333333337, + "r_y2": 312.6666666666667, + "r_x3": 351.33333333333337, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] } ] }, "tablestructure": { - "table_map": {} + "table_map": { + "0": { + "label": "table", + "id": 0, + "page_no": 0, + "cluster": { + "id": 0, + "label": "table", + "bbox": { + "l": 240.90093994140625, + "t": 277.41973876953125, + "r": 722.950309753418, + "b": 524.3504486083984, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9790865778923035, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 694.6666666666666, + "r_y0": 485.66666666666663, + "r_x1": 602.0, + "r_y1": 485.66666666666663, + "r_x2": 602.0, + "r_y2": 506.3333333333333, + "r_x3": 694.6666666666666, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + }, + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 680.0, + "r_y0": 454.33333333333337, + "r_x1": 610.0, + "r_y1": 454.33333333333337, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.3333333333334, + "r_y0": 392.0, + "r_x1": 623.3333333333334, + "r_y1": 392.0, + "r_x2": 623.3333333333334, + "r_y2": 408.0, + "r_x3": 670.3333333333334, + "r_y3": 408.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.3333333333334, + "r_y0": 312.0, + "r_x1": 623.3333333333334, + "r_y1": 312.0, + "r_x2": 623.3333333333334, + "r_y2": 328.0, + "r_x3": 670.3333333333334, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.6666666666667, + "r_y0": 485.66666666666663, + "r_x1": 428.3333333333333, + "r_y1": 485.66666666666663, + "r_x2": 428.3333333333333, + "r_y2": 506.3333333333333, + "r_x3": 559.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.6666666666667, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.6666666666667, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.3333333333333, + "r_y1": 408.0, + "r_x2": 442.3333333333333, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.3333333333333, + "r_y1": 328.0, + "r_x2": 442.3333333333333, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.6666666666667, + "r_x1": 468.0, + "r_y1": 296.6666666666667, + "r_x2": 468.0, + "r_y2": 312.6666666666667, + "r_x3": 515.0, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.6666666666667, + "r_y0": 490.33333333333337, + "r_x1": 274.0, + "r_y1": 490.33333333333337, + "r_x2": 274.0, + "r_y2": 506.3333333333333, + "r_x3": 385.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.66666666666663, + "r_y1": 459.0, + "r_x2": 292.66666666666663, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.6666666666667, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 376.0, + "r_x1": 304.33333333333337, + "r_y1": 376.0, + "r_x2": 304.33333333333337, + "r_y2": 392.0, + "r_x3": 351.33333333333337, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.6666666666667, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 296.6666666666667, + "r_x1": 304.33333333333337, + "r_y1": 296.6666666666667, + "r_x2": 304.33333333333337, + "r_y2": 312.6666666666667, + "r_x3": 351.33333333333337, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 602.0, + "t": 485.66666666666663, + "r": 694.6666666666666, + "b": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 694.6666666666666, + "r_y0": 485.66666666666663, + "r_x1": 602.0, + "r_y1": 485.66666666666663, + "r_x2": 602.0, + "r_y2": 506.3333333333333, + "r_x3": 694.6666666666666, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 610.0, + "t": 454.33333333333337, + "r": 680.0, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 680.0, + "r_y0": 454.33333333333337, + "r_x1": 610.0, + "r_y1": 454.33333333333337, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 623.3333333333334, + "t": 392.0, + "r": 670.3333333333334, + "b": 408.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.3333333333334, + "r_y0": 392.0, + "r_x1": 623.3333333333334, + "r_y1": 392.0, + "r_x2": 623.3333333333334, + "r_y2": 408.0, + "r_x3": 670.3333333333334, + "r_y3": 408.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 623.3333333333334, + "t": 312.0, + "r": 670.3333333333334, + "b": 328.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.3333333333334, + "r_y0": 312.0, + "r_x1": 623.3333333333334, + "r_y1": 312.0, + "r_x2": 623.3333333333334, + "r_y2": 328.0, + "r_x3": 670.3333333333334, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 428.3333333333333, + "t": 485.66666666666663, + "r": 559.6666666666667, + "b": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.6666666666667, + "r_y0": 485.66666666666663, + "r_x1": 428.3333333333333, + "r_y1": 485.66666666666663, + "r_x2": 428.3333333333333, + "r_y2": 506.3333333333333, + "r_x3": 559.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 456.0, + "t": 459.0, + "r": 526.6666666666667, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.6666666666667, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.6666666666667, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 442.3333333333333, + "t": 408.0, + "r": 545.0, + "b": 424.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 91.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.3333333333333, + "r_y1": 408.0, + "r_x2": 442.3333333333333, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 468.0, + "t": 376.0, + "r": 515.0, + "b": 392.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 442.3333333333333, + "t": 328.0, + "r": 545.0, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.3333333333333, + "r_y1": 328.0, + "r_x2": 442.3333333333333, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 468.0, + "t": 296.6666666666667, + "r": 515.0, + "b": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.6666666666667, + "r_x1": 468.0, + "r_y1": 296.6666666666667, + "r_x2": 468.0, + "r_y2": 312.6666666666667, + "r_x3": 515.0, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 274.0, + "t": 490.33333333333337, + "r": 385.6666666666667, + "b": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.6666666666667, + "r_y0": 490.33333333333337, + "r_x1": 274.0, + "r_y1": 490.33333333333337, + "r_x2": 274.0, + "r_y2": 506.3333333333333, + "r_x3": 385.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 292.66666666666663, + "t": 459.0, + "r": 363.0, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.66666666666663, + "r_y1": 459.0, + "r_x2": 292.66666666666663, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 278.0, + "t": 408.0, + "r": 383.6666666666667, + "b": 424.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.6666666666667, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 304.33333333333337, + "t": 376.0, + "r": 351.33333333333337, + "b": 392.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 376.0, + "r_x1": 304.33333333333337, + "r_y1": 376.0, + "r_x2": 304.33333333333337, + "r_y2": 392.0, + "r_x3": 351.33333333333337, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 278.0, + "t": 328.0, + "r": 383.6666666666667, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.6666666666667, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 304.33333333333337, + "t": 296.6666666666667, + "r": 351.33333333333337, + "b": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 296.6666666666667, + "r_x1": 304.33333333333337, + "r_y1": 296.6666666666667, + "r_x2": 304.33333333333337, + "r_y2": 312.6666666666667, + "r_x3": 351.33333333333337, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 3, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 97.33333333333337, + "t": 105.66666666666669, + "r": 190.0, + "b": 126.33333333333337, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Vertically merged", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.66666666666663, + "t": 204.0, + "r": 168.66666666666663, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.66666666666663, + "t": 284.0, + "r": 168.66666666666663, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.33333333333326, + "t": 105.66666666666669, + "r": 363.6666666666667, + "b": 126.33333333333337, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Other merged column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 188.0, + "r": 349.6666666666667, + "b": 204.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.6666666666667, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.3333333333333, + "t": 105.66666666666669, + "r": 518.0, + "b": 121.66666666666663, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.3333333333333, + "t": 188.0, + "r": 514.0, + "b": 204.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.3333333333333, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + } + } }, "figures_classification": null, "equations_prediction": null, @@ -262,20 +2790,20 @@ "assembled": { "elements": [ { - "label": "text", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 89.23887497045128, - "t": 717.1685676116198, - "r": 523.208764293368, - "b": 764.898293373551, + "l": 240.90093994140625, + "t": 277.41973876953125, + "r": 722.950309753418, + "b": 524.3504486083984, "coord_origin": "TOPLEFT" }, - "confidence": 0.7318570613861084, + "confidence": 0.9790865778923035, "cells": [ { "index": 0, @@ -286,20 +2814,20 @@ "a": 255 }, "rect": { - "r_x0": 89.2388782764286, - "r_y0": 764.898293373551, - "r_x1": 521.9863147998661, - "r_y1": 764.898293373551, - "r_x2": 521.9863147998661, - "r_y2": 744.0929853494625, - "r_x3": 89.2388782764286, - "r_y3": 744.0929853494625, + "r_x0": 694.6666666666666, + "r_y0": 485.66666666666663, + "r_x1": 602.0, + "r_y1": 485.66666666666663, + "r_x2": 602.0, + "r_y2": 506.3333333333333, + "r_x3": 694.6666666666666, + "r_y3": 506.3333333333333, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -311,43 +2839,22 @@ "a": 255 }, "rect": { - "r_x0": 89.23887497045128, - "r_y0": 739.1977118987292, - "r_x1": 523.208764293368, - "r_y1": 739.1977118987292, - "r_x2": 523.208764293368, - "r_y2": 717.1685676116198, - "r_x3": 89.23887497045128, - "r_y3": 717.1685676116198, + "r_x0": 680.0, + "r_y0": 454.33333333333337, + "r_x1": 610.0, + "r_y1": 454.33333333333337, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true - } - ], - "children": [] - }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "label": "text", - "id": 2, - "page_no": 0, - "cluster": { - "id": 2, - "label": "text", - "bbox": { - "l": 441.2561096985719, - "t": 690.0429592741025, - "r": 522.0347860494834, - "b": 710.0268078458798, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.5982133150100708, - "cells": [ + }, { "index": 2, "rgba": { @@ -357,44 +2864,1199 @@ "a": 255 }, "rect": { - "r_x0": 441.2561096985719, - "r_y0": 710.0268078458798, - "r_x1": 522.0347860494834, - "r_y1": 710.0268078458798, - "r_x2": 522.0347860494834, - "r_y2": 690.0429592741025, - "r_x3": 441.2561096985719, - "r_y3": 690.0429592741025, + "r_x0": 670.3333333333334, + "r_y0": 392.0, + "r_x1": 623.3333333333334, + "r_y1": 392.0, + "r_x2": 623.3333333333334, + "r_y2": 408.0, + "r_x3": 670.3333333333334, + "r_y3": 408.0, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.3333333333334, + "r_y0": 312.0, + "r_x1": 623.3333333333334, + "r_y1": 312.0, + "r_x2": 623.3333333333334, + "r_y2": 328.0, + "r_x3": 670.3333333333334, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.6666666666667, + "r_y0": 485.66666666666663, + "r_x1": 428.3333333333333, + "r_y1": 485.66666666666663, + "r_x2": 428.3333333333333, + "r_y2": 506.3333333333333, + "r_x3": 559.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.6666666666667, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.6666666666667, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.3333333333333, + "r_y1": 408.0, + "r_x2": 442.3333333333333, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.3333333333333, + "r_y1": 328.0, + "r_x2": 442.3333333333333, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.6666666666667, + "r_x1": 468.0, + "r_y1": 296.6666666666667, + "r_x2": 468.0, + "r_y2": 312.6666666666667, + "r_x3": 515.0, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.6666666666667, + "r_y0": 490.33333333333337, + "r_x1": 274.0, + "r_y1": 490.33333333333337, + "r_x2": 274.0, + "r_y2": 506.3333333333333, + "r_x3": 385.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.66666666666663, + "r_y1": 459.0, + "r_x2": 292.66666666666663, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.6666666666667, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 376.0, + "r_x1": 304.33333333333337, + "r_y1": 376.0, + "r_x2": 304.33333333333337, + "r_y2": 392.0, + "r_x3": 351.33333333333337, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.6666666666667, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 296.6666666666667, + "r_x1": 304.33333333333337, + "r_y1": 296.6666666666667, + "r_x2": 304.33333333333337, + "r_y2": 312.6666666666667, + "r_x3": 351.33333333333337, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 602.0, + "t": 485.66666666666663, + "r": 694.6666666666666, + "b": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 694.6666666666666, + "r_y0": 485.66666666666663, + "r_x1": 602.0, + "r_y1": 485.66666666666663, + "r_x2": 602.0, + "r_y2": 506.3333333333333, + "r_x3": 694.6666666666666, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 610.0, + "t": 454.33333333333337, + "r": 680.0, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 680.0, + "r_y0": 454.33333333333337, + "r_x1": 610.0, + "r_y1": 454.33333333333337, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 623.3333333333334, + "t": 392.0, + "r": 670.3333333333334, + "b": 408.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.3333333333334, + "r_y0": 392.0, + "r_x1": 623.3333333333334, + "r_y1": 392.0, + "r_x2": 623.3333333333334, + "r_y2": 408.0, + "r_x3": 670.3333333333334, + "r_y3": 408.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 623.3333333333334, + "t": 312.0, + "r": 670.3333333333334, + "b": 328.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.3333333333334, + "r_y0": 312.0, + "r_x1": 623.3333333333334, + "r_y1": 312.0, + "r_x2": 623.3333333333334, + "r_y2": 328.0, + "r_x3": 670.3333333333334, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 428.3333333333333, + "t": 485.66666666666663, + "r": 559.6666666666667, + "b": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.6666666666667, + "r_y0": 485.66666666666663, + "r_x1": 428.3333333333333, + "r_y1": 485.66666666666663, + "r_x2": 428.3333333333333, + "r_y2": 506.3333333333333, + "r_x3": 559.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 456.0, + "t": 459.0, + "r": 526.6666666666667, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.6666666666667, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.6666666666667, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 442.3333333333333, + "t": 408.0, + "r": 545.0, + "b": 424.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 91.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.3333333333333, + "r_y1": 408.0, + "r_x2": 442.3333333333333, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 468.0, + "t": 376.0, + "r": 515.0, + "b": 392.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 442.3333333333333, + "t": 328.0, + "r": 545.0, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.3333333333333, + "r_y1": 328.0, + "r_x2": 442.3333333333333, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 468.0, + "t": 296.6666666666667, + "r": 515.0, + "b": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.6666666666667, + "r_x1": 468.0, + "r_y1": 296.6666666666667, + "r_x2": 468.0, + "r_y2": 312.6666666666667, + "r_x3": 515.0, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 274.0, + "t": 490.33333333333337, + "r": 385.6666666666667, + "b": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.6666666666667, + "r_y0": 490.33333333333337, + "r_x1": 274.0, + "r_y1": 490.33333333333337, + "r_x2": 274.0, + "r_y2": 506.3333333333333, + "r_x3": 385.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 292.66666666666663, + "t": 459.0, + "r": 363.0, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.66666666666663, + "r_y1": 459.0, + "r_x2": 292.66666666666663, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 278.0, + "t": 408.0, + "r": 383.6666666666667, + "b": 424.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.6666666666667, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 304.33333333333337, + "t": 376.0, + "r": 351.33333333333337, + "b": 392.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 376.0, + "r_x1": 304.33333333333337, + "r_y1": 376.0, + "r_x2": 304.33333333333337, + "r_y2": 392.0, + "r_x3": 351.33333333333337, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 278.0, + "t": 328.0, + "r": 383.6666666666667, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.6666666666667, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 304.33333333333337, + "t": 296.6666666666667, + "r": 351.33333333333337, + "b": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 296.6666666666667, + "r_x1": 304.33333333333337, + "r_y1": 296.6666666666667, + "r_x2": 304.33333333333337, + "r_y2": 312.6666666666667, + "r_x3": 351.33333333333337, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "package" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 3, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 97.33333333333337, + "t": 105.66666666666669, + "r": 190.0, + "b": 126.33333333333337, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Vertically merged", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.66666666666663, + "t": 204.0, + "r": 168.66666666666663, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.66666666666663, + "t": 284.0, + "r": 168.66666666666663, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.33333333333326, + "t": 105.66666666666669, + "r": 363.6666666666667, + "b": 126.33333333333337, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Other merged column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 188.0, + "r": 349.6666666666667, + "b": 204.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.6666666666667, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.3333333333333, + "t": 105.66666666666669, + "r": 518.0, + "b": 121.66666666666663, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.3333333333333, + "t": 188.0, + "r": 514.0, + "b": 204.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.3333333333333, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } ], "body": [ { - "label": "text", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 89.23887497045128, - "t": 717.1685676116198, - "r": 523.208764293368, - "b": 764.898293373551, + "l": 240.90093994140625, + "t": 277.41973876953125, + "r": 722.950309753418, + "b": 524.3504486083984, "coord_origin": "TOPLEFT" }, - "confidence": 0.7318570613861084, + "confidence": 0.9790865778923035, "cells": [ { "index": 0, @@ -405,20 +4067,20 @@ "a": 255 }, "rect": { - "r_x0": 89.2388782764286, - "r_y0": 764.898293373551, - "r_x1": 521.9863147998661, - "r_y1": 764.898293373551, - "r_x2": 521.9863147998661, - "r_y2": 744.0929853494625, - "r_x3": 89.2388782764286, - "r_y3": 744.0929853494625, + "r_x0": 694.6666666666666, + "r_y0": 485.66666666666663, + "r_x1": 602.0, + "r_y1": 485.66666666666663, + "r_x2": 602.0, + "r_y2": 506.3333333333333, + "r_x3": 694.6666666666666, + "r_y3": 506.3333333333333, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -430,43 +4092,22 @@ "a": 255 }, "rect": { - "r_x0": 89.23887497045128, - "r_y0": 739.1977118987292, - "r_x1": 523.208764293368, - "r_y1": 739.1977118987292, - "r_x2": 523.208764293368, - "r_y2": 717.1685676116198, - "r_x3": 89.23887497045128, - "r_y3": 717.1685676116198, + "r_x0": 680.0, + "r_y0": 454.33333333333337, + "r_x1": 610.0, + "r_y1": 454.33333333333337, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true - } - ], - "children": [] - }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "label": "text", - "id": 2, - "page_no": 0, - "cluster": { - "id": 2, - "label": "text", - "bbox": { - "l": 441.2561096985719, - "t": 690.0429592741025, - "r": 522.0347860494834, - "b": 710.0268078458798, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.5982133150100708, - "cells": [ + }, { "index": 2, "rgba": { @@ -476,26 +4117,1181 @@ "a": 255 }, "rect": { - "r_x0": 441.2561096985719, - "r_y0": 710.0268078458798, - "r_x1": 522.0347860494834, - "r_y1": 710.0268078458798, - "r_x2": 522.0347860494834, - "r_y2": 690.0429592741025, - "r_x3": 441.2561096985719, - "r_y3": 690.0429592741025, + "r_x0": 670.3333333333334, + "r_y0": 392.0, + "r_x1": 623.3333333333334, + "r_y1": 392.0, + "r_x2": 623.3333333333334, + "r_y2": 408.0, + "r_x3": 670.3333333333334, + "r_y3": 408.0, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.3333333333334, + "r_y0": 312.0, + "r_x1": 623.3333333333334, + "r_y1": 312.0, + "r_x2": 623.3333333333334, + "r_y2": 328.0, + "r_x3": 670.3333333333334, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.6666666666667, + "r_y0": 485.66666666666663, + "r_x1": 428.3333333333333, + "r_y1": 485.66666666666663, + "r_x2": 428.3333333333333, + "r_y2": 506.3333333333333, + "r_x3": 559.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.6666666666667, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.6666666666667, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.3333333333333, + "r_y1": 408.0, + "r_x2": 442.3333333333333, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.3333333333333, + "r_y1": 328.0, + "r_x2": 442.3333333333333, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.6666666666667, + "r_x1": 468.0, + "r_y1": 296.6666666666667, + "r_x2": 468.0, + "r_y2": 312.6666666666667, + "r_x3": 515.0, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.6666666666667, + "r_y0": 490.33333333333337, + "r_x1": 274.0, + "r_y1": 490.33333333333337, + "r_x2": 274.0, + "r_y2": 506.3333333333333, + "r_x3": 385.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.66666666666663, + "r_y1": 459.0, + "r_x2": 292.66666666666663, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.6666666666667, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 376.0, + "r_x1": 304.33333333333337, + "r_y1": 376.0, + "r_x2": 304.33333333333337, + "r_y2": 392.0, + "r_x3": 351.33333333333337, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.6666666666667, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 296.6666666666667, + "r_x1": 304.33333333333337, + "r_y1": 296.6666666666667, + "r_x2": 304.33333333333337, + "r_y2": 312.6666666666667, + "r_x3": 351.33333333333337, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 602.0, + "t": 485.66666666666663, + "r": 694.6666666666666, + "b": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 694.6666666666666, + "r_y0": 485.66666666666663, + "r_x1": 602.0, + "r_y1": 485.66666666666663, + "r_x2": 602.0, + "r_y2": 506.3333333333333, + "r_x3": 694.6666666666666, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 610.0, + "t": 454.33333333333337, + "r": 680.0, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 680.0, + "r_y0": 454.33333333333337, + "r_x1": 610.0, + "r_y1": 454.33333333333337, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 623.3333333333334, + "t": 392.0, + "r": 670.3333333333334, + "b": 408.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.3333333333334, + "r_y0": 392.0, + "r_x1": 623.3333333333334, + "r_y1": 392.0, + "r_x2": 623.3333333333334, + "r_y2": 408.0, + "r_x3": 670.3333333333334, + "r_y3": 408.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 623.3333333333334, + "t": 312.0, + "r": 670.3333333333334, + "b": 328.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.3333333333334, + "r_y0": 312.0, + "r_x1": 623.3333333333334, + "r_y1": 312.0, + "r_x2": 623.3333333333334, + "r_y2": 328.0, + "r_x3": 670.3333333333334, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 428.3333333333333, + "t": 485.66666666666663, + "r": 559.6666666666667, + "b": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.6666666666667, + "r_y0": 485.66666666666663, + "r_x1": 428.3333333333333, + "r_y1": 485.66666666666663, + "r_x2": 428.3333333333333, + "r_y2": 506.3333333333333, + "r_x3": 559.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 456.0, + "t": 459.0, + "r": 526.6666666666667, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.6666666666667, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.6666666666667, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 442.3333333333333, + "t": 408.0, + "r": 545.0, + "b": 424.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 91.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.3333333333333, + "r_y1": 408.0, + "r_x2": 442.3333333333333, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 468.0, + "t": 376.0, + "r": 515.0, + "b": 392.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 442.3333333333333, + "t": 328.0, + "r": 545.0, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.3333333333333, + "r_y1": 328.0, + "r_x2": 442.3333333333333, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 468.0, + "t": 296.6666666666667, + "r": 515.0, + "b": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.6666666666667, + "r_x1": 468.0, + "r_y1": 296.6666666666667, + "r_x2": 468.0, + "r_y2": 312.6666666666667, + "r_x3": 515.0, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 274.0, + "t": 490.33333333333337, + "r": 385.6666666666667, + "b": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.6666666666667, + "r_y0": 490.33333333333337, + "r_x1": 274.0, + "r_y1": 490.33333333333337, + "r_x2": 274.0, + "r_y2": 506.3333333333333, + "r_x3": 385.6666666666667, + "r_y3": 506.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 292.66666666666663, + "t": 459.0, + "r": 363.0, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.66666666666663, + "r_y1": 459.0, + "r_x2": 292.66666666666663, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 278.0, + "t": 408.0, + "r": 383.6666666666667, + "b": 424.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.6666666666667, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 304.33333333333337, + "t": 376.0, + "r": 351.33333333333337, + "b": 392.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 376.0, + "r_x1": 304.33333333333337, + "r_y1": 376.0, + "r_x2": 304.33333333333337, + "r_y2": 392.0, + "r_x3": 351.33333333333337, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 278.0, + "t": 328.0, + "r": 383.6666666666667, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.6666666666667, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.6666666666667, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 304.33333333333337, + "t": 296.6666666666667, + "r": 351.33333333333337, + "b": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33333333333337, + "r_y0": 296.6666666666667, + "r_x1": 304.33333333333337, + "r_y1": 296.6666666666667, + "r_x2": 304.33333333333337, + "r_y2": 312.6666666666667, + "r_x3": 351.33333333333337, + "r_y3": 312.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "package" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 3, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 97.33333333333337, + "t": 105.66666666666669, + "r": 190.0, + "b": 126.33333333333337, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Vertically merged", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.66666666666663, + "t": 204.0, + "r": 168.66666666666663, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.66666666666663, + "t": 284.0, + "r": 168.66666666666663, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.33333333333326, + "t": 105.66666666666669, + "r": 363.6666666666667, + "b": 126.33333333333337, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Other merged column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 188.0, + "r": 349.6666666666667, + "b": 204.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.6666666666667, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.3333333333333, + "t": 105.66666666666669, + "r": 518.0, + "b": 121.66666666666663, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.3333333333333, + "t": 188.0, + "r": 514.0, + "b": 204.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.3333333333333, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } ], "headers": [] diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.doctags.txt b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.doctags.txt index d5c2972a..8afe2766 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.doctags.txt +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.doctags.txt @@ -1,3 +1,6 @@ -package + + +Yet another valueSome other valuevalue +
\ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.json index fed4d9ec..070a848c 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.json +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.json @@ -27,53 +27,149 @@ "file-info": { "filename": "ocr_test_rotated_270.pdf", "filename-prov": null, - "document-hash": "52f54e7183bdb73aa3713c7b169baca93e276963a138418c26e7d6a1ea128f14", + "document-hash": "753140dc9b8c39b67c6f6712e2a1de4c364c808ca09d13dd05b79c23192429dc", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [ { - "hash": "59bc9ddba89e7b008185dd16d384493beb034686e5670546786390c5d237a304", + "hash": "c8fa256d58940f76c5e0ec6b65548a2e939f867c2c75d0ee27f5f70ff32a44be", "model": "default", "page": 1 } ] }, "main-text": [ + { + "name": "Table", + "type": "table", + "$ref": "#/tables/0" + } + ], + "figures": [], + "tables": [ { "prov": [ { "bbox": [ - 690.2441821046808, - 442.39487414368364, - 709.8255852011977, - 523.076601235155 + 277.4178771972656, + 240.90216064453125, + 524.3541717529297, + 722.9614028930664 ], "page": 1, "span": [ 0, - 7 + 0 ], "__ref_s3_data": null } ], - "text": "package", - "type": "paragraph", + "text": "", + "type": "table", "payload": null, - "name": "Text", - "font": null + "#-cols": 3, + "#-rows": 1, + "data": [ + [ + { + "bbox": [ + 98.0, + 296.6666666666667, + 203.66666666666669, + 344.0 + ], + "spans": [ + [ + 0, + 0 + ] + ], + "text": "Yet another value", + "type": "body", + "col": 0, + "col-header": false, + "col-span": [ + 0, + 1 + ], + "row": 0, + "row-header": false, + "row-span": [ + 0, + 1 + ] + }, + { + "bbox": [ + 262.3333333333333, + 296.6666666666667, + 365.0, + 344.0 + ], + "spans": [ + [ + 0, + 1 + ] + ], + "text": "Some other value", + "type": "body", + "col": 1, + "col-header": false, + "col-span": [ + 1, + 2 + ], + "row": 0, + "row-header": false, + "row-span": [ + 0, + 1 + ] + }, + { + "bbox": [ + 443.33333333333337, + 312.0, + 490.33333333333337, + 328.0 + ], + "spans": [ + [ + 0, + 2 + ] + ], + "text": "value", + "type": "body", + "col": 2, + "col-header": false, + "col-span": [ + 2, + 3 + ], + "row": 0, + "row-header": false, + "row-span": [ + 0, + 1 + ] + } + ] + ], + "model": null, + "bounding-box": null } ], - "figures": [], - "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [ { - "height": 595.201171875, + "height": 792.0, "page": 1, - "width": 841.9216918945312 + "width": 612.0 } ], "page-footers": [], diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.md b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.md index 597acc76..e69de29b 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.md +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.md @@ -1 +0,0 @@ -package \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.pages.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.pages.json index 4caa899d..3638ffc4 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.pages.json +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_270.pages.json @@ -2,8 +2,8 @@ { "page_no": 0, "size": { - "width": 841.9216918945312, - "height": 595.201171875 + "width": 612.0, + "height": 792.0 }, "parsed_page": { "dimension": { @@ -11,47 +11,47 @@ "rect": { "r_x0": 0.0, "r_y0": 0.0, - "r_x1": 595.201171875, + "r_x1": 792.0, "r_y1": 0.0, - "r_x2": 595.201171875, - "r_y2": 841.9216918945312, + "r_x2": 792.0, + "r_y2": 612.0, "r_x3": 0.0, - "r_y3": 841.9216918945312, + "r_y3": 612.0, "coord_origin": "BOTTOMLEFT" }, "boundary_type": "crop_box", "art_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "bleed_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "crop_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "media_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "trim_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" } @@ -69,20 +69,20 @@ "a": 255 }, "rect": { - "r_x0": 744.0930045534915, - "r_y0": 504.87200373583954, - "r_x1": 764.8982839673505, - "r_y1": 504.87200373583954, - "r_x2": 764.8982839673505, - "r_y2": 73.34702001188118, - "r_x3": 744.0930045534915, - "r_y3": 73.34702001188118, + "r_x0": 485.66666666666663, + "r_y0": 97.33333333333333, + "r_x1": 485.66666666666663, + "r_y1": 190.0, + "r_x2": 506.3333333333333, + "r_y2": 190.0, + "r_x3": 506.3333333333333, + "r_y3": 97.33333333333333, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 89.0, "from_ocr": true }, { @@ -94,20 +94,20 @@ "a": 255 }, "rect": { - "r_x0": 717.168585936602, - "r_y0": 504.8720061466397, - "r_x1": 737.9738558137178, - "r_y1": 504.8720061466397, - "r_x2": 737.9738558137178, - "r_y2": 70.90211682372312, - "r_x3": 717.168585936602, - "r_y3": 70.90211682372312, + "r_x0": 454.33333333333337, + "r_y0": 112.0, + "r_x1": 454.33333333333337, + "r_y1": 182.33333333333334, + "r_x2": 475.0, + "r_y2": 182.33333333333334, + "r_x3": 475.0, + "r_y3": 112.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true }, { @@ -119,20 +119,345 @@ "a": 255 }, "rect": { - "r_x0": 690.2441821046808, - "r_y0": 152.80629773131633, - "r_x1": 709.8255852011977, - "r_y1": 152.80629773131633, - "r_x2": 709.8255852011977, - "r_y2": 72.124570639845, - "r_x3": 690.2441821046808, - "r_y3": 72.124570639845, + "r_x0": 392.0, + "r_y0": 121.66666666666667, + "r_x1": 392.0, + "r_y1": 168.66666666666666, + "r_x2": 408.0, + "r_y2": 168.66666666666666, + "r_x3": 408.0, + "r_y3": 121.66666666666667, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.66666666666667, + "r_x1": 312.0, + "r_y1": 168.66666666666666, + "r_x2": 328.0, + "r_y2": 168.66666666666666, + "r_x3": 328.0, + "r_y3": 121.66666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.66666666666663, + "r_y0": 232.66666666666666, + "r_x1": 485.66666666666663, + "r_y1": 364.0, + "r_x2": 506.3333333333333, + "r_y2": 364.0, + "r_x3": 506.3333333333333, + "r_y3": 232.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.66666666666663, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.6666666666667, + "r_x2": 424.0, + "r_y2": 349.6666666666667, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.3333333333333, + "r_x2": 392.0, + "r_y2": 324.3333333333333, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.6666666666667, + "r_x2": 344.0, + "r_y2": 349.6666666666667, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 277.33333333333337, + "r_x1": 296.6666666666667, + "r_y1": 324.3333333333333, + "r_x2": 312.6666666666667, + "r_y2": 324.3333333333333, + "r_x3": 312.6666666666667, + "r_y3": 277.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33333333333337, + "r_y0": 406.3333333333333, + "r_x1": 490.33333333333337, + "r_y1": 518.3333333333333, + "r_x2": 506.3333333333333, + "r_y2": 518.3333333333333, + "r_x3": 506.3333333333333, + "r_y3": 406.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.3333333333333, + "r_x1": 459.0, + "r_y1": 499.3333333333333, + "r_x2": 475.0, + "r_y2": 499.3333333333333, + "r_x3": 475.0, + "r_y3": 429.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.3333333333333, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.6666666666667, + "r_x1": 376.0, + "r_y1": 487.6666666666667, + "r_x2": 392.0, + "r_y2": 487.6666666666667, + "r_x3": 392.0, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.3333333333333, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 440.6666666666667, + "r_x1": 296.6666666666667, + "r_y1": 487.6666666666667, + "r_x2": 312.6666666666667, + "r_y2": 487.6666666666667, + "r_x3": 312.6666666666667, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], @@ -147,15 +472,15 @@ "clusters": [ { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 717.168585936602, - "t": 70.90211682372312, - "r": 764.8982839673505, - "b": 504.8720061466397, + "l": 277.4178771972656, + "t": 69.0385971069336, + "r": 524.3541717529297, + "b": 551.0978393554688, "coord_origin": "TOPLEFT" }, - "confidence": 0.6915205121040344, + "confidence": 0.9790208339691162, "cells": [ { "index": 0, @@ -166,20 +491,20 @@ "a": 255 }, "rect": { - "r_x0": 744.0930045534915, - "r_y0": 504.87200373583954, - "r_x1": 764.8982839673505, - "r_y1": 504.87200373583954, - "r_x2": 764.8982839673505, - "r_y2": 73.34702001188118, - "r_x3": 744.0930045534915, - "r_y3": 73.34702001188118, + "r_x0": 485.66666666666663, + "r_y0": 97.33333333333333, + "r_x1": 485.66666666666663, + "r_y1": 190.0, + "r_x2": 506.3333333333333, + "r_y2": 190.0, + "r_x3": 506.3333333333333, + "r_y3": 97.33333333333333, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 89.0, "from_ocr": true }, { @@ -191,37 +516,22 @@ "a": 255 }, "rect": { - "r_x0": 717.168585936602, - "r_y0": 504.8720061466397, - "r_x1": 737.9738558137178, - "r_y1": 504.8720061466397, - "r_x2": 737.9738558137178, - "r_y2": 70.90211682372312, - "r_x3": 717.168585936602, - "r_y3": 70.90211682372312, + "r_x0": 454.33333333333337, + "r_y0": 112.0, + "r_x1": 454.33333333333337, + "r_y1": 182.33333333333334, + "r_x2": 475.0, + "r_y2": 182.33333333333334, + "r_x3": 475.0, + "r_y3": 112.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true - } - ], - "children": [] - }, - { - "id": 8, - "label": "text", - "bbox": { - "l": 690.2441821046808, - "t": 72.124570639845, - "r": 709.8255852011977, - "b": 152.80629773131633, - "coord_origin": "TOPLEFT" - }, - "confidence": 1.0, - "cells": [ + }, { "index": 2, "rgba": { @@ -231,29 +541,2133 @@ "a": 255 }, "rect": { - "r_x0": 690.2441821046808, - "r_y0": 152.80629773131633, - "r_x1": 709.8255852011977, - "r_y1": 152.80629773131633, - "r_x2": 709.8255852011977, - "r_y2": 72.124570639845, - "r_x3": 690.2441821046808, - "r_y3": 72.124570639845, + "r_x0": 392.0, + "r_y0": 121.66666666666667, + "r_x1": 392.0, + "r_y1": 168.66666666666666, + "r_x2": 408.0, + "r_y2": 168.66666666666666, + "r_x3": 408.0, + "r_y3": 121.66666666666667, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.66666666666667, + "r_x1": 312.0, + "r_y1": 168.66666666666666, + "r_x2": 328.0, + "r_y2": 168.66666666666666, + "r_x3": 328.0, + "r_y3": 121.66666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.66666666666663, + "r_y0": 232.66666666666666, + "r_x1": 485.66666666666663, + "r_y1": 364.0, + "r_x2": 506.3333333333333, + "r_y2": 364.0, + "r_x3": 506.3333333333333, + "r_y3": 232.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.66666666666663, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.6666666666667, + "r_x2": 424.0, + "r_y2": 349.6666666666667, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.3333333333333, + "r_x2": 392.0, + "r_y2": 324.3333333333333, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.6666666666667, + "r_x2": 344.0, + "r_y2": 349.6666666666667, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 277.33333333333337, + "r_x1": 296.6666666666667, + "r_y1": 324.3333333333333, + "r_x2": 312.6666666666667, + "r_y2": 324.3333333333333, + "r_x3": 312.6666666666667, + "r_y3": 277.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33333333333337, + "r_y0": 406.3333333333333, + "r_x1": 490.33333333333337, + "r_y1": 518.3333333333333, + "r_x2": 506.3333333333333, + "r_y2": 518.3333333333333, + "r_x3": 506.3333333333333, + "r_y3": 406.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.3333333333333, + "r_x1": 459.0, + "r_y1": 499.3333333333333, + "r_x2": 475.0, + "r_y2": 499.3333333333333, + "r_x3": 475.0, + "r_y3": 429.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.3333333333333, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.6666666666667, + "r_x1": 376.0, + "r_y1": 487.6666666666667, + "r_x2": 392.0, + "r_y2": 487.6666666666667, + "r_x3": 392.0, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.3333333333333, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 440.6666666666667, + "r_x1": 296.6666666666667, + "r_y1": 487.6666666666667, + "r_x2": 312.6666666666667, + "r_y2": 487.6666666666667, + "r_x3": 312.6666666666667, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 485.66666666666663, + "t": 97.33333333333333, + "r": 506.3333333333333, + "b": 190.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.66666666666663, + "r_y0": 97.33333333333333, + "r_x1": 485.66666666666663, + "r_y1": 190.0, + "r_x2": 506.3333333333333, + "r_y2": 190.0, + "r_x3": 506.3333333333333, + "r_y3": 97.33333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 454.33333333333337, + "t": 112.0, + "r": 475.0, + "b": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 454.33333333333337, + "r_y0": 112.0, + "r_x1": 454.33333333333337, + "r_y1": 182.33333333333334, + "r_x2": 475.0, + "r_y2": 182.33333333333334, + "r_x3": 475.0, + "r_y3": 112.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 392.0, + "t": 121.66666666666667, + "r": 408.0, + "b": 168.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 392.0, + "r_y0": 121.66666666666667, + "r_x1": 392.0, + "r_y1": 168.66666666666666, + "r_x2": 408.0, + "r_y2": 168.66666666666666, + "r_x3": 408.0, + "r_y3": 121.66666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 312.0, + "t": 121.66666666666667, + "r": 328.0, + "b": 168.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.66666666666667, + "r_x1": 312.0, + "r_y1": 168.66666666666666, + "r_x2": 328.0, + "r_y2": 168.66666666666666, + "r_x3": 328.0, + "r_y3": 121.66666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 485.66666666666663, + "t": 232.66666666666666, + "r": 506.3333333333333, + "b": 364.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.66666666666663, + "r_y0": 232.66666666666666, + "r_x1": 485.66666666666663, + "r_y1": 364.0, + "r_x2": 506.3333333333333, + "r_y2": 364.0, + "r_x3": 506.3333333333333, + "r_y3": 232.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 459.0, + "t": 265.66666666666663, + "r": 475.0, + "b": 336.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.66666666666663, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 408.0, + "t": 247.0, + "r": 424.0, + "b": 349.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.6666666666667, + "r_x2": 424.0, + "r_y2": 349.6666666666667, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 376.0, + "t": 277.0, + "r": 392.0, + "b": 324.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.3333333333333, + "r_x2": 392.0, + "r_y2": 324.3333333333333, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 328.0, + "t": 247.0, + "r": 344.0, + "b": 349.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.6666666666667, + "r_x2": 344.0, + "r_y2": 349.6666666666667, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 296.6666666666667, + "t": 277.33333333333337, + "r": 312.6666666666667, + "b": 324.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 277.33333333333337, + "r_x1": 296.6666666666667, + "r_y1": 324.3333333333333, + "r_x2": 312.6666666666667, + "r_y2": 324.3333333333333, + "r_x3": 312.6666666666667, + "r_y3": 277.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 490.33333333333337, + "t": 406.3333333333333, + "r": 506.3333333333333, + "b": 518.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33333333333337, + "r_y0": 406.3333333333333, + "r_x1": 490.33333333333337, + "r_y1": 518.3333333333333, + "r_x2": 506.3333333333333, + "r_y2": 518.3333333333333, + "r_x3": 506.3333333333333, + "r_y3": 406.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 459.0, + "t": 429.3333333333333, + "r": 475.0, + "b": 499.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.3333333333333, + "r_x1": 459.0, + "r_y1": 499.3333333333333, + "r_x2": 475.0, + "r_y2": 499.3333333333333, + "r_x3": 475.0, + "r_y3": 429.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 408.0, + "t": 408.3333333333333, + "r": 424.0, + "b": 514.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.3333333333333, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 376.0, + "t": 440.6666666666667, + "r": 392.0, + "b": 487.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.6666666666667, + "r_x1": 376.0, + "r_y1": 487.6666666666667, + "r_x2": 392.0, + "r_y2": 487.6666666666667, + "r_x3": 392.0, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 328.0, + "t": 408.3333333333333, + "r": 344.0, + "b": 514.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.3333333333333, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 296.6666666666667, + "t": 440.6666666666667, + "r": 312.6666666666667, + "b": 487.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 440.6666666666667, + "r_x1": 296.6666666666667, + "r_y1": 487.6666666666667, + "r_x2": 312.6666666666667, + "r_y2": 487.6666666666667, + "r_x3": 312.6666666666667, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] } ] }, "tablestructure": { - "table_map": {} + "table_map": { + "0": { + "label": "table", + "id": 0, + "page_no": 0, + "cluster": { + "id": 0, + "label": "table", + "bbox": { + "l": 277.4178771972656, + "t": 69.0385971069336, + "r": 524.3541717529297, + "b": 551.0978393554688, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9790208339691162, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.66666666666663, + "r_y0": 97.33333333333333, + "r_x1": 485.66666666666663, + "r_y1": 190.0, + "r_x2": 506.3333333333333, + "r_y2": 190.0, + "r_x3": 506.3333333333333, + "r_y3": 97.33333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + }, + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 454.33333333333337, + "r_y0": 112.0, + "r_x1": 454.33333333333337, + "r_y1": 182.33333333333334, + "r_x2": 475.0, + "r_y2": 182.33333333333334, + "r_x3": 475.0, + "r_y3": 112.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 392.0, + "r_y0": 121.66666666666667, + "r_x1": 392.0, + "r_y1": 168.66666666666666, + "r_x2": 408.0, + "r_y2": 168.66666666666666, + "r_x3": 408.0, + "r_y3": 121.66666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.66666666666667, + "r_x1": 312.0, + "r_y1": 168.66666666666666, + "r_x2": 328.0, + "r_y2": 168.66666666666666, + "r_x3": 328.0, + "r_y3": 121.66666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.66666666666663, + "r_y0": 232.66666666666666, + "r_x1": 485.66666666666663, + "r_y1": 364.0, + "r_x2": 506.3333333333333, + "r_y2": 364.0, + "r_x3": 506.3333333333333, + "r_y3": 232.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.66666666666663, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.6666666666667, + "r_x2": 424.0, + "r_y2": 349.6666666666667, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.3333333333333, + "r_x2": 392.0, + "r_y2": 324.3333333333333, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.6666666666667, + "r_x2": 344.0, + "r_y2": 349.6666666666667, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 277.33333333333337, + "r_x1": 296.6666666666667, + "r_y1": 324.3333333333333, + "r_x2": 312.6666666666667, + "r_y2": 324.3333333333333, + "r_x3": 312.6666666666667, + "r_y3": 277.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33333333333337, + "r_y0": 406.3333333333333, + "r_x1": 490.33333333333337, + "r_y1": 518.3333333333333, + "r_x2": 506.3333333333333, + "r_y2": 518.3333333333333, + "r_x3": 506.3333333333333, + "r_y3": 406.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.3333333333333, + "r_x1": 459.0, + "r_y1": 499.3333333333333, + "r_x2": 475.0, + "r_y2": 499.3333333333333, + "r_x3": 475.0, + "r_y3": 429.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.3333333333333, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.6666666666667, + "r_x1": 376.0, + "r_y1": 487.6666666666667, + "r_x2": 392.0, + "r_y2": 487.6666666666667, + "r_x3": 392.0, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.3333333333333, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 440.6666666666667, + "r_x1": 296.6666666666667, + "r_y1": 487.6666666666667, + "r_x2": 312.6666666666667, + "r_y2": 487.6666666666667, + "r_x3": 312.6666666666667, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 485.66666666666663, + "t": 97.33333333333333, + "r": 506.3333333333333, + "b": 190.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.66666666666663, + "r_y0": 97.33333333333333, + "r_x1": 485.66666666666663, + "r_y1": 190.0, + "r_x2": 506.3333333333333, + "r_y2": 190.0, + "r_x3": 506.3333333333333, + "r_y3": 97.33333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 454.33333333333337, + "t": 112.0, + "r": 475.0, + "b": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 454.33333333333337, + "r_y0": 112.0, + "r_x1": 454.33333333333337, + "r_y1": 182.33333333333334, + "r_x2": 475.0, + "r_y2": 182.33333333333334, + "r_x3": 475.0, + "r_y3": 112.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 392.0, + "t": 121.66666666666667, + "r": 408.0, + "b": 168.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 392.0, + "r_y0": 121.66666666666667, + "r_x1": 392.0, + "r_y1": 168.66666666666666, + "r_x2": 408.0, + "r_y2": 168.66666666666666, + "r_x3": 408.0, + "r_y3": 121.66666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 312.0, + "t": 121.66666666666667, + "r": 328.0, + "b": 168.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.66666666666667, + "r_x1": 312.0, + "r_y1": 168.66666666666666, + "r_x2": 328.0, + "r_y2": 168.66666666666666, + "r_x3": 328.0, + "r_y3": 121.66666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 485.66666666666663, + "t": 232.66666666666666, + "r": 506.3333333333333, + "b": 364.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.66666666666663, + "r_y0": 232.66666666666666, + "r_x1": 485.66666666666663, + "r_y1": 364.0, + "r_x2": 506.3333333333333, + "r_y2": 364.0, + "r_x3": 506.3333333333333, + "r_y3": 232.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 459.0, + "t": 265.66666666666663, + "r": 475.0, + "b": 336.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.66666666666663, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 408.0, + "t": 247.0, + "r": 424.0, + "b": 349.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.6666666666667, + "r_x2": 424.0, + "r_y2": 349.6666666666667, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 376.0, + "t": 277.0, + "r": 392.0, + "b": 324.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.3333333333333, + "r_x2": 392.0, + "r_y2": 324.3333333333333, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 328.0, + "t": 247.0, + "r": 344.0, + "b": 349.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.6666666666667, + "r_x2": 344.0, + "r_y2": 349.6666666666667, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 296.6666666666667, + "t": 277.33333333333337, + "r": 312.6666666666667, + "b": 324.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 277.33333333333337, + "r_x1": 296.6666666666667, + "r_y1": 324.3333333333333, + "r_x2": 312.6666666666667, + "r_y2": 324.3333333333333, + "r_x3": 312.6666666666667, + "r_y3": 277.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 490.33333333333337, + "t": 406.3333333333333, + "r": 506.3333333333333, + "b": 518.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33333333333337, + "r_y0": 406.3333333333333, + "r_x1": 490.33333333333337, + "r_y1": 518.3333333333333, + "r_x2": 506.3333333333333, + "r_y2": 518.3333333333333, + "r_x3": 506.3333333333333, + "r_y3": 406.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 459.0, + "t": 429.3333333333333, + "r": 475.0, + "b": 499.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.3333333333333, + "r_x1": 459.0, + "r_y1": 499.3333333333333, + "r_x2": 475.0, + "r_y2": 499.3333333333333, + "r_x3": 475.0, + "r_y3": 429.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 408.0, + "t": 408.3333333333333, + "r": 424.0, + "b": 514.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.3333333333333, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 376.0, + "t": 440.6666666666667, + "r": 392.0, + "b": 487.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.6666666666667, + "r_x1": 376.0, + "r_y1": 487.6666666666667, + "r_x2": 392.0, + "r_y2": 487.6666666666667, + "r_x3": 392.0, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 328.0, + "t": 408.3333333333333, + "r": 344.0, + "b": 514.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.3333333333333, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 296.6666666666667, + "t": 440.6666666666667, + "r": 312.6666666666667, + "b": 487.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 440.6666666666667, + "r_x1": 296.6666666666667, + "r_y1": 487.6666666666667, + "r_x2": 312.6666666666667, + "r_y2": 487.6666666666667, + "r_x3": 312.6666666666667, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 1, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 443.33333333333337, + "t": 312.0, + "r": 490.33333333333337, + "b": 328.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 262.3333333333333, + "t": 296.6666666666667, + "r": 365.0, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 98.0, + "t": 296.6666666666667, + "r": 203.66666666666669, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + } + } }, "figures_classification": null, "equations_prediction": null, @@ -262,20 +2676,20 @@ "assembled": { "elements": [ { - "label": "page_header", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 717.168585936602, - "t": 70.90211682372312, - "r": 764.8982839673505, - "b": 504.8720061466397, + "l": 277.4178771972656, + "t": 69.0385971069336, + "r": 524.3541717529297, + "b": 551.0978393554688, "coord_origin": "TOPLEFT" }, - "confidence": 0.6915205121040344, + "confidence": 0.9790208339691162, "cells": [ { "index": 0, @@ -286,20 +2700,20 @@ "a": 255 }, "rect": { - "r_x0": 744.0930045534915, - "r_y0": 504.87200373583954, - "r_x1": 764.8982839673505, - "r_y1": 504.87200373583954, - "r_x2": 764.8982839673505, - "r_y2": 73.34702001188118, - "r_x3": 744.0930045534915, - "r_y3": 73.34702001188118, + "r_x0": 485.66666666666663, + "r_y0": 97.33333333333333, + "r_x1": 485.66666666666663, + "r_y1": 190.0, + "r_x2": 506.3333333333333, + "r_y2": 190.0, + "r_x3": 506.3333333333333, + "r_y3": 97.33333333333333, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 89.0, "from_ocr": true }, { @@ -311,43 +2725,22 @@ "a": 255 }, "rect": { - "r_x0": 717.168585936602, - "r_y0": 504.8720061466397, - "r_x1": 737.9738558137178, - "r_y1": 504.8720061466397, - "r_x2": 737.9738558137178, - "r_y2": 70.90211682372312, - "r_x3": 717.168585936602, - "r_y3": 70.90211682372312, + "r_x0": 454.33333333333337, + "r_y0": 112.0, + "r_x1": 454.33333333333337, + "r_y1": 182.33333333333334, + "r_x2": 475.0, + "r_y2": 182.33333333333334, + "r_x3": 475.0, + "r_y3": 112.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true - } - ], - "children": [] - }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "label": "text", - "id": 8, - "page_no": 0, - "cluster": { - "id": 8, - "label": "text", - "bbox": { - "l": 690.2441821046808, - "t": 72.124570639845, - "r": 709.8255852011977, - "b": 152.80629773131633, - "coord_origin": "TOPLEFT" - }, - "confidence": 1.0, - "cells": [ + }, { "index": 2, "rgba": { @@ -357,92 +2750,1085 @@ "a": 255 }, "rect": { - "r_x0": 690.2441821046808, - "r_y0": 152.80629773131633, - "r_x1": 709.8255852011977, - "r_y1": 152.80629773131633, - "r_x2": 709.8255852011977, - "r_y2": 72.124570639845, - "r_x3": 690.2441821046808, - "r_y3": 72.124570639845, + "r_x0": 392.0, + "r_y0": 121.66666666666667, + "r_x1": 392.0, + "r_y1": 168.66666666666666, + "r_x2": 408.0, + "r_y2": 168.66666666666666, + "r_x3": 408.0, + "r_y3": 121.66666666666667, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.66666666666667, + "r_x1": 312.0, + "r_y1": 168.66666666666666, + "r_x2": 328.0, + "r_y2": 168.66666666666666, + "r_x3": 328.0, + "r_y3": 121.66666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.66666666666663, + "r_y0": 232.66666666666666, + "r_x1": 485.66666666666663, + "r_y1": 364.0, + "r_x2": 506.3333333333333, + "r_y2": 364.0, + "r_x3": 506.3333333333333, + "r_y3": 232.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.66666666666663, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.6666666666667, + "r_x2": 424.0, + "r_y2": 349.6666666666667, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.3333333333333, + "r_x2": 392.0, + "r_y2": 324.3333333333333, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.6666666666667, + "r_x2": 344.0, + "r_y2": 349.6666666666667, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 277.33333333333337, + "r_x1": 296.6666666666667, + "r_y1": 324.3333333333333, + "r_x2": 312.6666666666667, + "r_y2": 324.3333333333333, + "r_x3": 312.6666666666667, + "r_y3": 277.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33333333333337, + "r_y0": 406.3333333333333, + "r_x1": 490.33333333333337, + "r_y1": 518.3333333333333, + "r_x2": 506.3333333333333, + "r_y2": 518.3333333333333, + "r_x3": 506.3333333333333, + "r_y3": 406.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.3333333333333, + "r_x1": 459.0, + "r_y1": 499.3333333333333, + "r_x2": 475.0, + "r_y2": 499.3333333333333, + "r_x3": 475.0, + "r_y3": 429.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.3333333333333, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.6666666666667, + "r_x1": 376.0, + "r_y1": 487.6666666666667, + "r_x2": 392.0, + "r_y2": 487.6666666666667, + "r_x3": 392.0, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.3333333333333, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 440.6666666666667, + "r_x1": 296.6666666666667, + "r_y1": 487.6666666666667, + "r_x2": 312.6666666666667, + "r_y2": 487.6666666666667, + "r_x3": 312.6666666666667, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 485.66666666666663, + "t": 97.33333333333333, + "r": 506.3333333333333, + "b": 190.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.66666666666663, + "r_y0": 97.33333333333333, + "r_x1": 485.66666666666663, + "r_y1": 190.0, + "r_x2": 506.3333333333333, + "r_y2": 190.0, + "r_x3": 506.3333333333333, + "r_y3": 97.33333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 454.33333333333337, + "t": 112.0, + "r": 475.0, + "b": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 454.33333333333337, + "r_y0": 112.0, + "r_x1": 454.33333333333337, + "r_y1": 182.33333333333334, + "r_x2": 475.0, + "r_y2": 182.33333333333334, + "r_x3": 475.0, + "r_y3": 112.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 392.0, + "t": 121.66666666666667, + "r": 408.0, + "b": 168.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 392.0, + "r_y0": 121.66666666666667, + "r_x1": 392.0, + "r_y1": 168.66666666666666, + "r_x2": 408.0, + "r_y2": 168.66666666666666, + "r_x3": 408.0, + "r_y3": 121.66666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 312.0, + "t": 121.66666666666667, + "r": 328.0, + "b": 168.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.66666666666667, + "r_x1": 312.0, + "r_y1": 168.66666666666666, + "r_x2": 328.0, + "r_y2": 168.66666666666666, + "r_x3": 328.0, + "r_y3": 121.66666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 485.66666666666663, + "t": 232.66666666666666, + "r": 506.3333333333333, + "b": 364.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.66666666666663, + "r_y0": 232.66666666666666, + "r_x1": 485.66666666666663, + "r_y1": 364.0, + "r_x2": 506.3333333333333, + "r_y2": 364.0, + "r_x3": 506.3333333333333, + "r_y3": 232.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 459.0, + "t": 265.66666666666663, + "r": 475.0, + "b": 336.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.66666666666663, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 408.0, + "t": 247.0, + "r": 424.0, + "b": 349.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.6666666666667, + "r_x2": 424.0, + "r_y2": 349.6666666666667, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 376.0, + "t": 277.0, + "r": 392.0, + "b": 324.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.3333333333333, + "r_x2": 392.0, + "r_y2": 324.3333333333333, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 328.0, + "t": 247.0, + "r": 344.0, + "b": 349.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.6666666666667, + "r_x2": 344.0, + "r_y2": 349.6666666666667, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 296.6666666666667, + "t": 277.33333333333337, + "r": 312.6666666666667, + "b": 324.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 277.33333333333337, + "r_x1": 296.6666666666667, + "r_y1": 324.3333333333333, + "r_x2": 312.6666666666667, + "r_y2": 324.3333333333333, + "r_x3": 312.6666666666667, + "r_y3": 277.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 490.33333333333337, + "t": 406.3333333333333, + "r": 506.3333333333333, + "b": 518.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33333333333337, + "r_y0": 406.3333333333333, + "r_x1": 490.33333333333337, + "r_y1": 518.3333333333333, + "r_x2": 506.3333333333333, + "r_y2": 518.3333333333333, + "r_x3": 506.3333333333333, + "r_y3": 406.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 459.0, + "t": 429.3333333333333, + "r": 475.0, + "b": 499.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.3333333333333, + "r_x1": 459.0, + "r_y1": 499.3333333333333, + "r_x2": 475.0, + "r_y2": 499.3333333333333, + "r_x3": 475.0, + "r_y3": 429.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 408.0, + "t": 408.3333333333333, + "r": 424.0, + "b": 514.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.3333333333333, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 376.0, + "t": 440.6666666666667, + "r": 392.0, + "b": 487.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.6666666666667, + "r_x1": 376.0, + "r_y1": 487.6666666666667, + "r_x2": 392.0, + "r_y2": 487.6666666666667, + "r_x3": 392.0, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 328.0, + "t": 408.3333333333333, + "r": 344.0, + "b": 514.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.3333333333333, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 296.6666666666667, + "t": 440.6666666666667, + "r": 312.6666666666667, + "b": 487.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 440.6666666666667, + "r_x1": 296.6666666666667, + "r_y1": 487.6666666666667, + "r_x2": 312.6666666666667, + "r_y2": 487.6666666666667, + "r_x3": 312.6666666666667, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "package" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 1, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 443.33333333333337, + "t": 312.0, + "r": 490.33333333333337, + "b": 328.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 262.3333333333333, + "t": 296.6666666666667, + "r": 365.0, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 98.0, + "t": 296.6666666666667, + "r": 203.66666666666669, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } ], "body": [ { - "label": "text", - "id": 8, - "page_no": 0, - "cluster": { - "id": 8, - "label": "text", - "bbox": { - "l": 690.2441821046808, - "t": 72.124570639845, - "r": 709.8255852011977, - "b": 152.80629773131633, - "coord_origin": "TOPLEFT" - }, - "confidence": 1.0, - "cells": [ - { - "index": 2, - "rgba": { - "r": 0, - "g": 0, - "b": 0, - "a": 255 - }, - "rect": { - "r_x0": 690.2441821046808, - "r_y0": 152.80629773131633, - "r_x1": 709.8255852011977, - "r_y1": 152.80629773131633, - "r_x2": 709.8255852011977, - "r_y2": 72.124570639845, - "r_x3": 690.2441821046808, - "r_y3": 72.124570639845, - "coord_origin": "TOPLEFT" - }, - "text": "package", - "orig": "package", - "text_direction": "left_to_right", - "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - "text": "package" - } - ], - "headers": [ - { - "label": "page_header", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 717.168585936602, - "t": 70.90211682372312, - "r": 764.8982839673505, - "b": 504.8720061466397, + "l": 277.4178771972656, + "t": 69.0385971069336, + "r": 524.3541717529297, + "b": 551.0978393554688, "coord_origin": "TOPLEFT" }, - "confidence": 0.6915205121040344, + "confidence": 0.9790208339691162, "cells": [ { "index": 0, @@ -453,20 +3839,20 @@ "a": 255 }, "rect": { - "r_x0": 744.0930045534915, - "r_y0": 504.87200373583954, - "r_x1": 764.8982839673505, - "r_y1": 504.87200373583954, - "r_x2": 764.8982839673505, - "r_y2": 73.34702001188118, - "r_x3": 744.0930045534915, - "r_y3": 73.34702001188118, + "r_x0": 485.66666666666663, + "r_y0": 97.33333333333333, + "r_x1": 485.66666666666663, + "r_y1": 190.0, + "r_x2": 506.3333333333333, + "r_y2": 190.0, + "r_x3": 506.3333333333333, + "r_y3": 97.33333333333333, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 89.0, "from_ocr": true }, { @@ -478,28 +3864,1095 @@ "a": 255 }, "rect": { - "r_x0": 717.168585936602, - "r_y0": 504.8720061466397, - "r_x1": 737.9738558137178, - "r_y1": 504.8720061466397, - "r_x2": 737.9738558137178, - "r_y2": 70.90211682372312, - "r_x3": 717.168585936602, - "r_y3": 70.90211682372312, + "r_x0": 454.33333333333337, + "r_y0": 112.0, + "r_x1": 454.33333333333337, + "r_y1": 182.33333333333334, + "r_x2": 475.0, + "r_y2": 182.33333333333334, + "r_x3": 475.0, + "r_y3": 112.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 392.0, + "r_y0": 121.66666666666667, + "r_x1": 392.0, + "r_y1": 168.66666666666666, + "r_x2": 408.0, + "r_y2": 168.66666666666666, + "r_x3": 408.0, + "r_y3": 121.66666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.66666666666667, + "r_x1": 312.0, + "r_y1": 168.66666666666666, + "r_x2": 328.0, + "r_y2": 168.66666666666666, + "r_x3": 328.0, + "r_y3": 121.66666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.66666666666663, + "r_y0": 232.66666666666666, + "r_x1": 485.66666666666663, + "r_y1": 364.0, + "r_x2": 506.3333333333333, + "r_y2": 364.0, + "r_x3": 506.3333333333333, + "r_y3": 232.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.66666666666663, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.6666666666667, + "r_x2": 424.0, + "r_y2": 349.6666666666667, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.3333333333333, + "r_x2": 392.0, + "r_y2": 324.3333333333333, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.6666666666667, + "r_x2": 344.0, + "r_y2": 349.6666666666667, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 277.33333333333337, + "r_x1": 296.6666666666667, + "r_y1": 324.3333333333333, + "r_x2": 312.6666666666667, + "r_y2": 324.3333333333333, + "r_x3": 312.6666666666667, + "r_y3": 277.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33333333333337, + "r_y0": 406.3333333333333, + "r_x1": 490.33333333333337, + "r_y1": 518.3333333333333, + "r_x2": 506.3333333333333, + "r_y2": 518.3333333333333, + "r_x3": 506.3333333333333, + "r_y3": 406.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.3333333333333, + "r_x1": 459.0, + "r_y1": 499.3333333333333, + "r_x2": 475.0, + "r_y2": 499.3333333333333, + "r_x3": 475.0, + "r_y3": 429.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.3333333333333, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.6666666666667, + "r_x1": 376.0, + "r_y1": 487.6666666666667, + "r_x2": 392.0, + "r_y2": 487.6666666666667, + "r_x3": 392.0, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.3333333333333, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 440.6666666666667, + "r_x1": 296.6666666666667, + "r_y1": 487.6666666666667, + "r_x2": 312.6666666666667, + "r_y2": 487.6666666666667, + "r_x3": 312.6666666666667, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 485.66666666666663, + "t": 97.33333333333333, + "r": 506.3333333333333, + "b": 190.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.66666666666663, + "r_y0": 97.33333333333333, + "r_x1": 485.66666666666663, + "r_y1": 190.0, + "r_x2": 506.3333333333333, + "r_y2": 190.0, + "r_x3": 506.3333333333333, + "r_y3": 97.33333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 454.33333333333337, + "t": 112.0, + "r": 475.0, + "b": 182.33333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 454.33333333333337, + "r_y0": 112.0, + "r_x1": 454.33333333333337, + "r_y1": 182.33333333333334, + "r_x2": 475.0, + "r_y2": 182.33333333333334, + "r_x3": 475.0, + "r_y3": 112.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 392.0, + "t": 121.66666666666667, + "r": 408.0, + "b": 168.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 392.0, + "r_y0": 121.66666666666667, + "r_x1": 392.0, + "r_y1": 168.66666666666666, + "r_x2": 408.0, + "r_y2": 168.66666666666666, + "r_x3": 408.0, + "r_y3": 121.66666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 312.0, + "t": 121.66666666666667, + "r": 328.0, + "b": 168.66666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.66666666666667, + "r_x1": 312.0, + "r_y1": 168.66666666666666, + "r_x2": 328.0, + "r_y2": 168.66666666666666, + "r_x3": 328.0, + "r_y3": 121.66666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 485.66666666666663, + "t": 232.66666666666666, + "r": 506.3333333333333, + "b": 364.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.66666666666663, + "r_y0": 232.66666666666666, + "r_x1": 485.66666666666663, + "r_y1": 364.0, + "r_x2": 506.3333333333333, + "r_y2": 364.0, + "r_x3": 506.3333333333333, + "r_y3": 232.66666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 459.0, + "t": 265.66666666666663, + "r": 475.0, + "b": 336.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.66666666666663, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.66666666666663, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 408.0, + "t": 247.0, + "r": 424.0, + "b": 349.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.6666666666667, + "r_x2": 424.0, + "r_y2": 349.6666666666667, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 376.0, + "t": 277.0, + "r": 392.0, + "b": 324.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.3333333333333, + "r_x2": 392.0, + "r_y2": 324.3333333333333, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 328.0, + "t": 247.0, + "r": 344.0, + "b": 349.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.6666666666667, + "r_x2": 344.0, + "r_y2": 349.6666666666667, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 296.6666666666667, + "t": 277.33333333333337, + "r": 312.6666666666667, + "b": 324.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 277.33333333333337, + "r_x1": 296.6666666666667, + "r_y1": 324.3333333333333, + "r_x2": 312.6666666666667, + "r_y2": 324.3333333333333, + "r_x3": 312.6666666666667, + "r_y3": 277.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 490.33333333333337, + "t": 406.3333333333333, + "r": 506.3333333333333, + "b": 518.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33333333333337, + "r_y0": 406.3333333333333, + "r_x1": 490.33333333333337, + "r_y1": 518.3333333333333, + "r_x2": 506.3333333333333, + "r_y2": 518.3333333333333, + "r_x3": 506.3333333333333, + "r_y3": 406.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 459.0, + "t": 429.3333333333333, + "r": 475.0, + "b": 499.3333333333333, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.3333333333333, + "r_x1": 459.0, + "r_y1": 499.3333333333333, + "r_x2": 475.0, + "r_y2": 499.3333333333333, + "r_x3": 475.0, + "r_y3": 429.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 408.0, + "t": 408.3333333333333, + "r": 424.0, + "b": 514.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.3333333333333, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 376.0, + "t": 440.6666666666667, + "r": 392.0, + "b": 487.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.6666666666667, + "r_x1": 376.0, + "r_y1": 487.6666666666667, + "r_x2": 392.0, + "r_y2": 487.6666666666667, + "r_x3": 392.0, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 328.0, + "t": 408.3333333333333, + "r": 344.0, + "b": 514.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.3333333333333, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.3333333333333, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 296.6666666666667, + "t": 440.6666666666667, + "r": 312.6666666666667, + "b": 487.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.6666666666667, + "r_y0": 440.6666666666667, + "r_x1": 296.6666666666667, + "r_y1": 487.6666666666667, + "r_x2": 312.6666666666667, + "r_y2": 487.6666666666667, + "r_x3": 312.6666666666667, + "r_y3": 440.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 1, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 443.33333333333337, + "t": 312.0, + "r": 490.33333333333337, + "b": 328.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 262.3333333333333, + "t": 296.6666666666667, + "r": 365.0, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 98.0, + "t": 296.6666666666667, + "r": 203.66666666666669, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } - ] + ], + "headers": [] } } ] \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.doctags.txt b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.doctags.txt index 0b7a3a14..a9e1d3bd 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.doctags.txt +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.doctags.txt @@ -1,3 +1,5 @@ -package + + +
\ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.json index 5a622c92..ad27e476 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.json +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.json @@ -27,53 +27,62 @@ "file-info": { "filename": "ocr_test_rotated_90.pdf", "filename-prov": null, - "document-hash": "4a282813d93824eaa9bc2a0b2a0d6d626ecc8f5f380bd1320e2dd3e8e53c2ba6", + "document-hash": "418ae4425f514f002bd4223ea3003c17f319cbeafd67801732d58f2bedb3bd91", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [ { - "hash": "f8a4dc72d8b159f69d0bc968b97f3fb9e0ac59dcb3113492432755835935d9b3", + "hash": "36315c08dc861ecde4be6179d2f155da0519b93e0311c290f8db164f593d36d8", "model": "default", "page": 1 } ] }, "main-text": [ + { + "name": "Table", + "type": "table", + "$ref": "#/tables/0" + } + ], + "figures": [], + "tables": [ { "prov": [ { "bbox": [ - 131.21306574279092, - 74.12495603322407, - 152.19606490864376, - 154.19400205373182 + 87.64582824707031, + 69.0385971069336, + 334.5821228027344, + 551.0978393554688 ], "page": 1, "span": [ 0, - 7 + 0 ], "__ref_s3_data": null } ], - "text": "package", - "type": "paragraph", + "text": "", + "type": "table", "payload": null, - "name": "Text", - "font": null + "#-cols": 0, + "#-rows": 0, + "data": [], + "model": null, + "bounding-box": null } ], - "figures": [], - "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [ { - "height": 595.201171875, + "height": 792.0, "page": 1, - "width": 841.9216918945312 + "width": 612.0 } ], "page-footers": [], diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.md b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.md index 597acc76..e69de29b 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.md +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.md @@ -1 +0,0 @@ -package \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.pages.json b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.pages.json index e6bcce8c..c26e2ec8 100644 --- a/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.pages.json +++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test_rotated_90.pages.json @@ -2,8 +2,8 @@ { "page_no": 0, "size": { - "width": 841.9216918945312, - "height": 595.201171875 + "width": 612.0, + "height": 792.0 }, "parsed_page": { "dimension": { @@ -11,47 +11,47 @@ "rect": { "r_x0": 0.0, "r_y0": 0.0, - "r_x1": 595.201171875, + "r_x1": 792.0, "r_y1": 0.0, - "r_x2": 595.201171875, - "r_y2": 841.9216918945312, + "r_x2": 792.0, + "r_y2": 612.0, "r_x3": 0.0, - "r_y3": 841.9216918945312, + "r_y3": 612.0, "coord_origin": "BOTTOMLEFT" }, "boundary_type": "crop_box", "art_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "bleed_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "crop_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "media_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "trim_bbox": { "l": 0.0, - "t": 841.9216918945312, - "r": 595.201171875, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" } @@ -69,20 +69,20 @@ "a": 255 }, "rect": { - "r_x0": 77.10171545548258, - "r_y0": 520.7638571913312, - "r_x1": 96.68315797053792, - "r_y1": 520.7638571913312, - "r_x2": 96.68315797053792, - "r_y2": 89.2388734673729, - "r_x3": 77.10171545548258, - "r_y3": 89.2388734673729, + "r_x0": 126.33333333333334, + "r_y0": 694.6666666666666, + "r_x1": 126.33333333333334, + "r_y1": 602.0, + "r_x2": 105.66666666666666, + "r_y2": 602.0, + "r_x3": 105.66666666666666, + "r_y3": 694.6666666666666, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -94,20 +94,20 @@ "a": 255 }, "rect": { - "r_x0": 100.64168123325977, - "r_y0": 523.3236155182395, - "r_x1": 126.08064862014129, - "r_y1": 523.3236155182395, - "r_x2": 126.08064862014129, - "r_y2": 89.1266754140729, - "r_x3": 100.64168123325977, - "r_y3": 89.1266754140729, + "r_x0": 157.66666666666669, + "r_y0": 680.3333333333334, + "r_x1": 157.66666666666669, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.3333333333334, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true }, { @@ -119,20 +119,345 @@ "a": 255 }, "rect": { - "r_x0": 131.21306574279092, - "r_y0": 521.0762158417759, - "r_x1": 152.19606490864376, - "r_y1": 521.0762158417759, - "r_x2": 152.19606490864376, - "r_y2": 441.0071698212682, - "r_x3": 131.21306574279092, - "r_y3": 441.0071698212682, + "r_x0": 220.0, + "r_y0": 670.3333333333334, + "r_x1": 220.0, + "r_y1": 623.3333333333334, + "r_x2": 204.0, + "r_y2": 623.3333333333334, + "r_x3": 204.0, + "r_y3": 670.3333333333334, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.3333333333334, + "r_x1": 300.0, + "r_y1": 623.3333333333334, + "r_x2": 284.0, + "r_y2": 623.3333333333334, + "r_x3": 284.0, + "r_y3": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33333333333334, + "r_y0": 559.6666666666667, + "r_x1": 126.33333333333334, + "r_y1": 428.0, + "r_x2": 105.66666666666666, + "r_y2": 428.0, + "r_x3": 105.66666666666666, + "r_y3": 559.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.6666666666667, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 545.0, + "r_x1": 204.33333333333334, + "r_y1": 442.3333333333333, + "r_x2": 188.33333333333331, + "r_y2": 442.3333333333333, + "r_x3": 188.33333333333331, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.3333333333333, + "r_x2": 268.0, + "r_y2": 442.3333333333333, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 515.0, + "r_x1": 315.33333333333337, + "r_y1": 468.0, + "r_x2": 299.66666666666663, + "r_y2": 468.0, + "r_x3": 299.66666666666663, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666666, + "r_y0": 385.6666666666667, + "r_x1": 121.66666666666666, + "r_y1": 274.0, + "r_x2": 105.66666666666666, + "r_y2": 274.0, + "r_x3": 105.66666666666666, + "r_y3": 385.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.66666666666663, + "r_x2": 137.0, + "r_y2": 292.66666666666663, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 384.0, + "r_x1": 204.33333333333334, + "r_y1": 278.0, + "r_x2": 188.33333333333331, + "r_y2": 278.0, + "r_x3": 188.33333333333331, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33333333333337, + "r_x1": 236.0, + "r_y1": 304.33333333333337, + "r_x2": 220.0, + "r_y2": 304.33333333333337, + "r_x3": 220.0, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.6666666666667, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 351.33333333333337, + "r_x1": 315.33333333333337, + "r_y1": 304.33333333333337, + "r_x2": 299.66666666666663, + "r_y2": 304.33333333333337, + "r_x3": 299.66666666666663, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], @@ -147,15 +472,15 @@ "clusters": [ { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 77.10171545548258, - "t": 89.1266754140729, - "r": 126.08064862014129, - "b": 523.3236155182395, + "l": 87.64582824707031, + "t": 240.90216064453125, + "r": 334.5821228027344, + "b": 722.9614028930664, "coord_origin": "TOPLEFT" }, - "confidence": 0.6016772389411926, + "confidence": 0.9790208339691162, "cells": [ { "index": 0, @@ -166,20 +491,20 @@ "a": 255 }, "rect": { - "r_x0": 77.10171545548258, - "r_y0": 520.7638571913312, - "r_x1": 96.68315797053792, - "r_y1": 520.7638571913312, - "r_x2": 96.68315797053792, - "r_y2": 89.2388734673729, - "r_x3": 77.10171545548258, - "r_y3": 89.2388734673729, + "r_x0": 126.33333333333334, + "r_y0": 694.6666666666666, + "r_x1": 126.33333333333334, + "r_y1": 602.0, + "r_x2": 105.66666666666666, + "r_y2": 602.0, + "r_x3": 105.66666666666666, + "r_y3": 694.6666666666666, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -191,37 +516,22 @@ "a": 255 }, "rect": { - "r_x0": 100.64168123325977, - "r_y0": 523.3236155182395, - "r_x1": 126.08064862014129, - "r_y1": 523.3236155182395, - "r_x2": 126.08064862014129, - "r_y2": 89.1266754140729, - "r_x3": 100.64168123325977, - "r_y3": 89.1266754140729, + "r_x0": 157.66666666666669, + "r_y0": 680.3333333333334, + "r_x1": 157.66666666666669, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.3333333333334, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true - } - ], - "children": [] - }, - { - "id": 1, - "label": "text", - "bbox": { - "l": 131.21306574279092, - "t": 441.0071698212682, - "r": 152.19606490864376, - "b": 521.0762158417759, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.5234212875366211, - "cells": [ + }, { "index": 2, "rgba": { @@ -231,29 +541,2075 @@ "a": 255 }, "rect": { - "r_x0": 131.21306574279092, - "r_y0": 521.0762158417759, - "r_x1": 152.19606490864376, - "r_y1": 521.0762158417759, - "r_x2": 152.19606490864376, - "r_y2": 441.0071698212682, - "r_x3": 131.21306574279092, - "r_y3": 441.0071698212682, + "r_x0": 220.0, + "r_y0": 670.3333333333334, + "r_x1": 220.0, + "r_y1": 623.3333333333334, + "r_x2": 204.0, + "r_y2": 623.3333333333334, + "r_x3": 204.0, + "r_y3": 670.3333333333334, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.3333333333334, + "r_x1": 300.0, + "r_y1": 623.3333333333334, + "r_x2": 284.0, + "r_y2": 623.3333333333334, + "r_x3": 284.0, + "r_y3": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33333333333334, + "r_y0": 559.6666666666667, + "r_x1": 126.33333333333334, + "r_y1": 428.0, + "r_x2": 105.66666666666666, + "r_y2": 428.0, + "r_x3": 105.66666666666666, + "r_y3": 559.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.6666666666667, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 545.0, + "r_x1": 204.33333333333334, + "r_y1": 442.3333333333333, + "r_x2": 188.33333333333331, + "r_y2": 442.3333333333333, + "r_x3": 188.33333333333331, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.3333333333333, + "r_x2": 268.0, + "r_y2": 442.3333333333333, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 515.0, + "r_x1": 315.33333333333337, + "r_y1": 468.0, + "r_x2": 299.66666666666663, + "r_y2": 468.0, + "r_x3": 299.66666666666663, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666666, + "r_y0": 385.6666666666667, + "r_x1": 121.66666666666666, + "r_y1": 274.0, + "r_x2": 105.66666666666666, + "r_y2": 274.0, + "r_x3": 105.66666666666666, + "r_y3": 385.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.66666666666663, + "r_x2": 137.0, + "r_y2": 292.66666666666663, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 384.0, + "r_x1": 204.33333333333334, + "r_y1": 278.0, + "r_x2": 188.33333333333331, + "r_y2": 278.0, + "r_x3": 188.33333333333331, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33333333333337, + "r_x1": 236.0, + "r_y1": 304.33333333333337, + "r_x2": 220.0, + "r_y2": 304.33333333333337, + "r_x3": 220.0, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.6666666666667, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 351.33333333333337, + "r_x1": 315.33333333333337, + "r_y1": 304.33333333333337, + "r_x2": 299.66666666666663, + "r_y2": 304.33333333333337, + "r_x3": 299.66666666666663, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 105.66666666666666, + "t": 602.0, + "r": 126.33333333333334, + "b": 694.6666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33333333333334, + "r_y0": 694.6666666666666, + "r_x1": 126.33333333333334, + "r_y1": 602.0, + "r_x2": 105.66666666666666, + "r_y2": 602.0, + "r_x3": 105.66666666666666, + "r_y3": 694.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 137.0, + "t": 610.0, + "r": 157.66666666666669, + "b": 680.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 157.66666666666669, + "r_y0": 680.3333333333334, + "r_x1": 157.66666666666669, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 204.0, + "t": 623.3333333333334, + "r": 220.0, + "b": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 220.0, + "r_y0": 670.3333333333334, + "r_x1": 220.0, + "r_y1": 623.3333333333334, + "r_x2": 204.0, + "r_y2": 623.3333333333334, + "r_x3": 204.0, + "r_y3": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 284.0, + "t": 623.3333333333334, + "r": 300.0, + "b": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.3333333333334, + "r_x1": 300.0, + "r_y1": 623.3333333333334, + "r_x2": 284.0, + "r_y2": 623.3333333333334, + "r_x3": 284.0, + "r_y3": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 105.66666666666666, + "t": 428.0, + "r": 126.33333333333334, + "b": 559.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 91.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33333333333334, + "r_y0": 559.6666666666667, + "r_x1": 126.33333333333334, + "r_y1": 428.0, + "r_x2": 105.66666666666666, + "r_y2": 428.0, + "r_x3": 105.66666666666666, + "r_y3": 559.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 137.0, + "t": 456.0, + "r": 153.0, + "b": 526.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.6666666666667, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 188.33333333333331, + "t": 442.3333333333333, + "r": 204.33333333333334, + "b": 545.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 545.0, + "r_x1": 204.33333333333334, + "r_y1": 442.3333333333333, + "r_x2": 188.33333333333331, + "r_y2": 442.3333333333333, + "r_x3": 188.33333333333331, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 220.0, + "t": 468.0, + "r": 236.0, + "b": 515.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 268.0, + "t": 442.3333333333333, + "r": 284.0, + "b": 545.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 93.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.3333333333333, + "r_x2": 268.0, + "r_y2": 442.3333333333333, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 299.66666666666663, + "t": 468.0, + "r": 315.33333333333337, + "b": 515.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 515.0, + "r_x1": 315.33333333333337, + "r_y1": 468.0, + "r_x2": 299.66666666666663, + "r_y2": 468.0, + "r_x3": 299.66666666666663, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 105.66666666666666, + "t": 274.0, + "r": 121.66666666666666, + "b": 385.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666666, + "r_y0": 385.6666666666667, + "r_x1": 121.66666666666666, + "r_y1": 274.0, + "r_x2": 105.66666666666666, + "r_y2": 274.0, + "r_x3": 105.66666666666666, + "r_y3": 385.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 137.0, + "t": 292.66666666666663, + "r": 153.0, + "b": 363.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.66666666666663, + "r_x2": 137.0, + "r_y2": 292.66666666666663, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 188.33333333333331, + "t": 278.0, + "r": 204.33333333333334, + "b": 384.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 384.0, + "r_x1": 204.33333333333334, + "r_y1": 278.0, + "r_x2": 188.33333333333331, + "r_y2": 278.0, + "r_x3": 188.33333333333331, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 220.0, + "t": 304.33333333333337, + "r": 236.0, + "b": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33333333333337, + "r_x1": 236.0, + "r_y1": 304.33333333333337, + "r_x2": 220.0, + "r_y2": 304.33333333333337, + "r_x3": 220.0, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 268.0, + "t": 278.0, + "r": 284.0, + "b": 383.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.6666666666667, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 299.66666666666663, + "t": 304.33333333333337, + "r": 315.33333333333337, + "b": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 351.33333333333337, + "r_x1": 315.33333333333337, + "r_y1": 304.33333333333337, + "r_x2": 299.66666666666663, + "r_y2": 304.33333333333337, + "r_x3": 299.66666666666663, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] } ] }, "tablestructure": { - "table_map": {} + "table_map": { + "0": { + "label": "table", + "id": 0, + "page_no": 0, + "cluster": { + "id": 0, + "label": "table", + "bbox": { + "l": 87.64582824707031, + "t": 240.90216064453125, + "r": 334.5821228027344, + "b": 722.9614028930664, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.9790208339691162, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33333333333334, + "r_y0": 694.6666666666666, + "r_x1": 126.33333333333334, + "r_y1": 602.0, + "r_x2": 105.66666666666666, + "r_y2": 602.0, + "r_x3": 105.66666666666666, + "r_y3": 694.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + }, + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 157.66666666666669, + "r_y0": 680.3333333333334, + "r_x1": 157.66666666666669, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 220.0, + "r_y0": 670.3333333333334, + "r_x1": 220.0, + "r_y1": 623.3333333333334, + "r_x2": 204.0, + "r_y2": 623.3333333333334, + "r_x3": 204.0, + "r_y3": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.3333333333334, + "r_x1": 300.0, + "r_y1": 623.3333333333334, + "r_x2": 284.0, + "r_y2": 623.3333333333334, + "r_x3": 284.0, + "r_y3": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33333333333334, + "r_y0": 559.6666666666667, + "r_x1": 126.33333333333334, + "r_y1": 428.0, + "r_x2": 105.66666666666666, + "r_y2": 428.0, + "r_x3": 105.66666666666666, + "r_y3": 559.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.6666666666667, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 545.0, + "r_x1": 204.33333333333334, + "r_y1": 442.3333333333333, + "r_x2": 188.33333333333331, + "r_y2": 442.3333333333333, + "r_x3": 188.33333333333331, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.3333333333333, + "r_x2": 268.0, + "r_y2": 442.3333333333333, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 515.0, + "r_x1": 315.33333333333337, + "r_y1": 468.0, + "r_x2": 299.66666666666663, + "r_y2": 468.0, + "r_x3": 299.66666666666663, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666666, + "r_y0": 385.6666666666667, + "r_x1": 121.66666666666666, + "r_y1": 274.0, + "r_x2": 105.66666666666666, + "r_y2": 274.0, + "r_x3": 105.66666666666666, + "r_y3": 385.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.66666666666663, + "r_x2": 137.0, + "r_y2": 292.66666666666663, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 384.0, + "r_x1": 204.33333333333334, + "r_y1": 278.0, + "r_x2": 188.33333333333331, + "r_y2": 278.0, + "r_x3": 188.33333333333331, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33333333333337, + "r_x1": 236.0, + "r_y1": 304.33333333333337, + "r_x2": 220.0, + "r_y2": 304.33333333333337, + "r_x3": 220.0, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.6666666666667, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 351.33333333333337, + "r_x1": 315.33333333333337, + "r_y1": 304.33333333333337, + "r_x2": 299.66666666666663, + "r_y2": 304.33333333333337, + "r_x3": 299.66666666666663, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 105.66666666666666, + "t": 602.0, + "r": 126.33333333333334, + "b": 694.6666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33333333333334, + "r_y0": 694.6666666666666, + "r_x1": 126.33333333333334, + "r_y1": 602.0, + "r_x2": 105.66666666666666, + "r_y2": 602.0, + "r_x3": 105.66666666666666, + "r_y3": 694.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 137.0, + "t": 610.0, + "r": 157.66666666666669, + "b": 680.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 157.66666666666669, + "r_y0": 680.3333333333334, + "r_x1": 157.66666666666669, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 204.0, + "t": 623.3333333333334, + "r": 220.0, + "b": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 220.0, + "r_y0": 670.3333333333334, + "r_x1": 220.0, + "r_y1": 623.3333333333334, + "r_x2": 204.0, + "r_y2": 623.3333333333334, + "r_x3": 204.0, + "r_y3": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 284.0, + "t": 623.3333333333334, + "r": 300.0, + "b": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.3333333333334, + "r_x1": 300.0, + "r_y1": 623.3333333333334, + "r_x2": 284.0, + "r_y2": 623.3333333333334, + "r_x3": 284.0, + "r_y3": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 105.66666666666666, + "t": 428.0, + "r": 126.33333333333334, + "b": 559.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 91.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33333333333334, + "r_y0": 559.6666666666667, + "r_x1": 126.33333333333334, + "r_y1": 428.0, + "r_x2": 105.66666666666666, + "r_y2": 428.0, + "r_x3": 105.66666666666666, + "r_y3": 559.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 137.0, + "t": 456.0, + "r": 153.0, + "b": 526.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.6666666666667, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 188.33333333333331, + "t": 442.3333333333333, + "r": 204.33333333333334, + "b": 545.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 545.0, + "r_x1": 204.33333333333334, + "r_y1": 442.3333333333333, + "r_x2": 188.33333333333331, + "r_y2": 442.3333333333333, + "r_x3": 188.33333333333331, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 220.0, + "t": 468.0, + "r": 236.0, + "b": 515.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 268.0, + "t": 442.3333333333333, + "r": 284.0, + "b": 545.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 93.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.3333333333333, + "r_x2": 268.0, + "r_y2": 442.3333333333333, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 299.66666666666663, + "t": 468.0, + "r": 315.33333333333337, + "b": 515.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 515.0, + "r_x1": 315.33333333333337, + "r_y1": 468.0, + "r_x2": 299.66666666666663, + "r_y2": 468.0, + "r_x3": 299.66666666666663, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 105.66666666666666, + "t": 274.0, + "r": 121.66666666666666, + "b": 385.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666666, + "r_y0": 385.6666666666667, + "r_x1": 121.66666666666666, + "r_y1": 274.0, + "r_x2": 105.66666666666666, + "r_y2": 274.0, + "r_x3": 105.66666666666666, + "r_y3": 385.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 137.0, + "t": 292.66666666666663, + "r": 153.0, + "b": 363.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.66666666666663, + "r_x2": 137.0, + "r_y2": 292.66666666666663, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 188.33333333333331, + "t": 278.0, + "r": 204.33333333333334, + "b": 384.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 384.0, + "r_x1": 204.33333333333334, + "r_y1": 278.0, + "r_x2": 188.33333333333331, + "r_y2": 278.0, + "r_x3": 188.33333333333331, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 220.0, + "t": 304.33333333333337, + "r": 236.0, + "b": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33333333333337, + "r_x1": 236.0, + "r_y1": 304.33333333333337, + "r_x2": 220.0, + "r_y2": 304.33333333333337, + "r_x3": 220.0, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 268.0, + "t": 278.0, + "r": 284.0, + "b": 383.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.6666666666667, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 299.66666666666663, + "t": 304.33333333333337, + "r": 315.33333333333337, + "b": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 351.33333333333337, + "r_x1": 315.33333333333337, + "r_y1": 304.33333333333337, + "r_x2": 299.66666666666663, + "r_y2": 304.33333333333337, + "r_x3": 299.66666666666663, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 0, + "num_cols": 0, + "table_cells": [] + } + } }, "figures_classification": null, "equations_prediction": null, @@ -262,20 +2618,20 @@ "assembled": { "elements": [ { - "label": "page_header", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 77.10171545548258, - "t": 89.1266754140729, - "r": 126.08064862014129, - "b": 523.3236155182395, + "l": 87.64582824707031, + "t": 240.90216064453125, + "r": 334.5821228027344, + "b": 722.9614028930664, "coord_origin": "TOPLEFT" }, - "confidence": 0.6016772389411926, + "confidence": 0.9790208339691162, "cells": [ { "index": 0, @@ -286,20 +2642,20 @@ "a": 255 }, "rect": { - "r_x0": 77.10171545548258, - "r_y0": 520.7638571913312, - "r_x1": 96.68315797053792, - "r_y1": 520.7638571913312, - "r_x2": 96.68315797053792, - "r_y2": 89.2388734673729, - "r_x3": 77.10171545548258, - "r_y3": 89.2388734673729, + "r_x0": 126.33333333333334, + "r_y0": 694.6666666666666, + "r_x1": 126.33333333333334, + "r_y1": 602.0, + "r_x2": 105.66666666666666, + "r_y2": 602.0, + "r_x3": 105.66666666666666, + "r_y3": 694.6666666666666, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -311,43 +2667,22 @@ "a": 255 }, "rect": { - "r_x0": 100.64168123325977, - "r_y0": 523.3236155182395, - "r_x1": 126.08064862014129, - "r_y1": 523.3236155182395, - "r_x2": 126.08064862014129, - "r_y2": 89.1266754140729, - "r_x3": 100.64168123325977, - "r_y3": 89.1266754140729, + "r_x0": 157.66666666666669, + "r_y0": 680.3333333333334, + "r_x1": 157.66666666666669, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.3333333333334, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true - } - ], - "children": [] - }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "label": "text", - "id": 1, - "page_no": 0, - "cluster": { - "id": 1, - "label": "text", - "bbox": { - "l": 131.21306574279092, - "t": 441.0071698212682, - "r": 152.19606490864376, - "b": 521.0762158417759, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.5234212875366211, - "cells": [ + }, { "index": 2, "rgba": { @@ -357,92 +2692,1027 @@ "a": 255 }, "rect": { - "r_x0": 131.21306574279092, - "r_y0": 521.0762158417759, - "r_x1": 152.19606490864376, - "r_y1": 521.0762158417759, - "r_x2": 152.19606490864376, - "r_y2": 441.0071698212682, - "r_x3": 131.21306574279092, - "r_y3": 441.0071698212682, + "r_x0": 220.0, + "r_y0": 670.3333333333334, + "r_x1": 220.0, + "r_y1": 623.3333333333334, + "r_x2": 204.0, + "r_y2": 623.3333333333334, + "r_x3": 204.0, + "r_y3": 670.3333333333334, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.3333333333334, + "r_x1": 300.0, + "r_y1": 623.3333333333334, + "r_x2": 284.0, + "r_y2": 623.3333333333334, + "r_x3": 284.0, + "r_y3": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33333333333334, + "r_y0": 559.6666666666667, + "r_x1": 126.33333333333334, + "r_y1": 428.0, + "r_x2": 105.66666666666666, + "r_y2": 428.0, + "r_x3": 105.66666666666666, + "r_y3": 559.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.6666666666667, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 545.0, + "r_x1": 204.33333333333334, + "r_y1": 442.3333333333333, + "r_x2": 188.33333333333331, + "r_y2": 442.3333333333333, + "r_x3": 188.33333333333331, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.3333333333333, + "r_x2": 268.0, + "r_y2": 442.3333333333333, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 515.0, + "r_x1": 315.33333333333337, + "r_y1": 468.0, + "r_x2": 299.66666666666663, + "r_y2": 468.0, + "r_x3": 299.66666666666663, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666666, + "r_y0": 385.6666666666667, + "r_x1": 121.66666666666666, + "r_y1": 274.0, + "r_x2": 105.66666666666666, + "r_y2": 274.0, + "r_x3": 105.66666666666666, + "r_y3": 385.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.66666666666663, + "r_x2": 137.0, + "r_y2": 292.66666666666663, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 384.0, + "r_x1": 204.33333333333334, + "r_y1": 278.0, + "r_x2": 188.33333333333331, + "r_y2": 278.0, + "r_x3": 188.33333333333331, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33333333333337, + "r_x1": 236.0, + "r_y1": 304.33333333333337, + "r_x2": 220.0, + "r_y2": 304.33333333333337, + "r_x3": 220.0, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.6666666666667, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 351.33333333333337, + "r_x1": 315.33333333333337, + "r_y1": 304.33333333333337, + "r_x2": 299.66666666666663, + "r_y2": 304.33333333333337, + "r_x3": 299.66666666666663, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 105.66666666666666, + "t": 602.0, + "r": 126.33333333333334, + "b": 694.6666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33333333333334, + "r_y0": 694.6666666666666, + "r_x1": 126.33333333333334, + "r_y1": 602.0, + "r_x2": 105.66666666666666, + "r_y2": 602.0, + "r_x3": 105.66666666666666, + "r_y3": 694.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 137.0, + "t": 610.0, + "r": 157.66666666666669, + "b": 680.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 157.66666666666669, + "r_y0": 680.3333333333334, + "r_x1": 157.66666666666669, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 204.0, + "t": 623.3333333333334, + "r": 220.0, + "b": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 220.0, + "r_y0": 670.3333333333334, + "r_x1": 220.0, + "r_y1": 623.3333333333334, + "r_x2": 204.0, + "r_y2": 623.3333333333334, + "r_x3": 204.0, + "r_y3": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 284.0, + "t": 623.3333333333334, + "r": 300.0, + "b": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.3333333333334, + "r_x1": 300.0, + "r_y1": 623.3333333333334, + "r_x2": 284.0, + "r_y2": 623.3333333333334, + "r_x3": 284.0, + "r_y3": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 105.66666666666666, + "t": 428.0, + "r": 126.33333333333334, + "b": 559.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 91.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33333333333334, + "r_y0": 559.6666666666667, + "r_x1": 126.33333333333334, + "r_y1": 428.0, + "r_x2": 105.66666666666666, + "r_y2": 428.0, + "r_x3": 105.66666666666666, + "r_y3": 559.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 137.0, + "t": 456.0, + "r": 153.0, + "b": 526.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.6666666666667, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 188.33333333333331, + "t": 442.3333333333333, + "r": 204.33333333333334, + "b": 545.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 545.0, + "r_x1": 204.33333333333334, + "r_y1": 442.3333333333333, + "r_x2": 188.33333333333331, + "r_y2": 442.3333333333333, + "r_x3": 188.33333333333331, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 220.0, + "t": 468.0, + "r": 236.0, + "b": 515.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 268.0, + "t": 442.3333333333333, + "r": 284.0, + "b": 545.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 93.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.3333333333333, + "r_x2": 268.0, + "r_y2": 442.3333333333333, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 299.66666666666663, + "t": 468.0, + "r": 315.33333333333337, + "b": 515.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 515.0, + "r_x1": 315.33333333333337, + "r_y1": 468.0, + "r_x2": 299.66666666666663, + "r_y2": 468.0, + "r_x3": 299.66666666666663, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 105.66666666666666, + "t": 274.0, + "r": 121.66666666666666, + "b": 385.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666666, + "r_y0": 385.6666666666667, + "r_x1": 121.66666666666666, + "r_y1": 274.0, + "r_x2": 105.66666666666666, + "r_y2": 274.0, + "r_x3": 105.66666666666666, + "r_y3": 385.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 137.0, + "t": 292.66666666666663, + "r": 153.0, + "b": 363.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.66666666666663, + "r_x2": 137.0, + "r_y2": 292.66666666666663, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 188.33333333333331, + "t": 278.0, + "r": 204.33333333333334, + "b": 384.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 384.0, + "r_x1": 204.33333333333334, + "r_y1": 278.0, + "r_x2": 188.33333333333331, + "r_y2": 278.0, + "r_x3": 188.33333333333331, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 220.0, + "t": 304.33333333333337, + "r": 236.0, + "b": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33333333333337, + "r_x1": 236.0, + "r_y1": 304.33333333333337, + "r_x2": 220.0, + "r_y2": 304.33333333333337, + "r_x3": 220.0, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 268.0, + "t": 278.0, + "r": 284.0, + "b": 383.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.6666666666667, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 299.66666666666663, + "t": 304.33333333333337, + "r": 315.33333333333337, + "b": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 351.33333333333337, + "r_x1": 315.33333333333337, + "r_y1": 304.33333333333337, + "r_x2": 299.66666666666663, + "r_y2": 304.33333333333337, + "r_x3": 299.66666666666663, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "package" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 0, + "num_cols": 0, + "table_cells": [] } ], "body": [ { - "label": "text", - "id": 1, - "page_no": 0, - "cluster": { - "id": 1, - "label": "text", - "bbox": { - "l": 131.21306574279092, - "t": 441.0071698212682, - "r": 152.19606490864376, - "b": 521.0762158417759, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.5234212875366211, - "cells": [ - { - "index": 2, - "rgba": { - "r": 0, - "g": 0, - "b": 0, - "a": 255 - }, - "rect": { - "r_x0": 131.21306574279092, - "r_y0": 521.0762158417759, - "r_x1": 152.19606490864376, - "r_y1": 521.0762158417759, - "r_x2": 152.19606490864376, - "r_y2": 441.0071698212682, - "r_x3": 131.21306574279092, - "r_y3": 441.0071698212682, - "coord_origin": "TOPLEFT" - }, - "text": "package", - "orig": "package", - "text_direction": "left_to_right", - "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - "text": "package" - } - ], - "headers": [ - { - "label": "page_header", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 77.10171545548258, - "t": 89.1266754140729, - "r": 126.08064862014129, - "b": 523.3236155182395, + "l": 87.64582824707031, + "t": 240.90216064453125, + "r": 334.5821228027344, + "b": 722.9614028930664, "coord_origin": "TOPLEFT" }, - "confidence": 0.6016772389411926, + "confidence": 0.9790208339691162, "cells": [ { "index": 0, @@ -453,20 +3723,20 @@ "a": 255 }, "rect": { - "r_x0": 77.10171545548258, - "r_y0": 520.7638571913312, - "r_x1": 96.68315797053792, - "r_y1": 520.7638571913312, - "r_x2": 96.68315797053792, - "r_y2": 89.2388734673729, - "r_x3": 77.10171545548258, - "r_y3": 89.2388734673729, + "r_x0": 126.33333333333334, + "r_y0": 694.6666666666666, + "r_x1": 126.33333333333334, + "r_y1": 602.0, + "r_x2": 105.66666666666666, + "r_y2": 602.0, + "r_x3": 105.66666666666666, + "r_y3": 694.6666666666666, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -478,28 +3748,1037 @@ "a": 255 }, "rect": { - "r_x0": 100.64168123325977, - "r_y0": 523.3236155182395, - "r_x1": 126.08064862014129, - "r_y1": 523.3236155182395, - "r_x2": 126.08064862014129, - "r_y2": 89.1266754140729, - "r_x3": 100.64168123325977, - "r_y3": 89.1266754140729, + "r_x0": 157.66666666666669, + "r_y0": 680.3333333333334, + "r_x1": 157.66666666666669, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.3333333333334, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 220.0, + "r_y0": 670.3333333333334, + "r_x1": 220.0, + "r_y1": 623.3333333333334, + "r_x2": 204.0, + "r_y2": 623.3333333333334, + "r_x3": 204.0, + "r_y3": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.3333333333334, + "r_x1": 300.0, + "r_y1": 623.3333333333334, + "r_x2": 284.0, + "r_y2": 623.3333333333334, + "r_x3": 284.0, + "r_y3": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33333333333334, + "r_y0": 559.6666666666667, + "r_x1": 126.33333333333334, + "r_y1": 428.0, + "r_x2": 105.66666666666666, + "r_y2": 428.0, + "r_x3": 105.66666666666666, + "r_y3": 559.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.6666666666667, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 545.0, + "r_x1": 204.33333333333334, + "r_y1": 442.3333333333333, + "r_x2": 188.33333333333331, + "r_y2": 442.3333333333333, + "r_x3": 188.33333333333331, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.3333333333333, + "r_x2": 268.0, + "r_y2": 442.3333333333333, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 515.0, + "r_x1": 315.33333333333337, + "r_y1": 468.0, + "r_x2": 299.66666666666663, + "r_y2": 468.0, + "r_x3": 299.66666666666663, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666666, + "r_y0": 385.6666666666667, + "r_x1": 121.66666666666666, + "r_y1": 274.0, + "r_x2": 105.66666666666666, + "r_y2": 274.0, + "r_x3": 105.66666666666666, + "r_y3": 385.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.66666666666663, + "r_x2": 137.0, + "r_y2": 292.66666666666663, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 384.0, + "r_x1": 204.33333333333334, + "r_y1": 278.0, + "r_x2": 188.33333333333331, + "r_y2": 278.0, + "r_x3": 188.33333333333331, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33333333333337, + "r_x1": 236.0, + "r_y1": 304.33333333333337, + "r_x2": 220.0, + "r_y2": 304.33333333333337, + "r_x3": 220.0, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.6666666666667, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 351.33333333333337, + "r_x1": 315.33333333333337, + "r_y1": 304.33333333333337, + "r_x2": 299.66666666666663, + "r_y2": 304.33333333333337, + "r_x3": 299.66666666666663, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 105.66666666666666, + "t": 602.0, + "r": 126.33333333333334, + "b": 694.6666666666666, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33333333333334, + "r_y0": 694.6666666666666, + "r_x1": 126.33333333333334, + "r_y1": 602.0, + "r_x2": 105.66666666666666, + "r_y2": 602.0, + "r_x3": 105.66666666666666, + "r_y3": 694.6666666666666, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 137.0, + "t": 610.0, + "r": 157.66666666666669, + "b": 680.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 157.66666666666669, + "r_y0": 680.3333333333334, + "r_x1": 157.66666666666669, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 204.0, + "t": 623.3333333333334, + "r": 220.0, + "b": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 220.0, + "r_y0": 670.3333333333334, + "r_x1": 220.0, + "r_y1": 623.3333333333334, + "r_x2": 204.0, + "r_y2": 623.3333333333334, + "r_x3": 204.0, + "r_y3": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 284.0, + "t": 623.3333333333334, + "r": 300.0, + "b": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.3333333333334, + "r_x1": 300.0, + "r_y1": 623.3333333333334, + "r_x2": 284.0, + "r_y2": 623.3333333333334, + "r_x3": 284.0, + "r_y3": 670.3333333333334, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 105.66666666666666, + "t": 428.0, + "r": 126.33333333333334, + "b": 559.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 91.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33333333333334, + "r_y0": 559.6666666666667, + "r_x1": 126.33333333333334, + "r_y1": 428.0, + "r_x2": 105.66666666666666, + "r_y2": 428.0, + "r_x3": 105.66666666666666, + "r_y3": 559.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 137.0, + "t": 456.0, + "r": 153.0, + "b": 526.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.6666666666667, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 188.33333333333331, + "t": 442.3333333333333, + "r": 204.33333333333334, + "b": 545.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 545.0, + "r_x1": 204.33333333333334, + "r_y1": 442.3333333333333, + "r_x2": 188.33333333333331, + "r_y2": 442.3333333333333, + "r_x3": 188.33333333333331, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 220.0, + "t": 468.0, + "r": 236.0, + "b": 515.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 268.0, + "t": 442.3333333333333, + "r": 284.0, + "b": 545.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 93.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.3333333333333, + "r_x2": 268.0, + "r_y2": 442.3333333333333, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 299.66666666666663, + "t": 468.0, + "r": 315.33333333333337, + "b": 515.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 515.0, + "r_x1": 315.33333333333337, + "r_y1": 468.0, + "r_x2": 299.66666666666663, + "r_y2": 468.0, + "r_x3": 299.66666666666663, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 105.66666666666666, + "t": 274.0, + "r": 121.66666666666666, + "b": 385.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.66666666666666, + "r_y0": 385.6666666666667, + "r_x1": 121.66666666666666, + "r_y1": 274.0, + "r_x2": 105.66666666666666, + "r_y2": 274.0, + "r_x3": 105.66666666666666, + "r_y3": 385.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 137.0, + "t": 292.66666666666663, + "r": 153.0, + "b": 363.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.66666666666663, + "r_x2": 137.0, + "r_y2": 292.66666666666663, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 188.33333333333331, + "t": 278.0, + "r": 204.33333333333334, + "b": 384.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33333333333334, + "r_y0": 384.0, + "r_x1": 204.33333333333334, + "r_y1": 278.0, + "r_x2": 188.33333333333331, + "r_y2": 278.0, + "r_x3": 188.33333333333331, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 220.0, + "t": 304.33333333333337, + "r": 236.0, + "b": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33333333333337, + "r_x1": 236.0, + "r_y1": 304.33333333333337, + "r_x2": 220.0, + "r_y2": 304.33333333333337, + "r_x3": 220.0, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 268.0, + "t": 278.0, + "r": 284.0, + "b": 383.6666666666667, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.6666666666667, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.6666666666667, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 299.66666666666663, + "t": 304.33333333333337, + "r": 315.33333333333337, + "b": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33333333333337, + "r_y0": 351.33333333333337, + "r_x1": 315.33333333333337, + "r_y1": 304.33333333333337, + "r_x2": 299.66666666666663, + "r_y2": 304.33333333333337, + "r_x3": 299.66666666666663, + "r_y3": 351.33333333333337, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 0, + "num_cols": 0, + "table_cells": [] } - ] + ], + "headers": [] } } ] \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test.doctags.txt b/tests/data_scanned/groundtruth/docling_v2/ocr_test.doctags.txt index c210e4dd..73f13805 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test.doctags.txt +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test.doctags.txt @@ -1,2 +1,2 @@ -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package +Vertically mergedOther merged columnYet another columnvalueSome other valueYet another valuevalueSome other valueYet another value \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test.json index 22a1c54d..f0ed675a 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test.json @@ -4,7 +4,7 @@ "name": "ocr_test", "origin": { "mimetype": "application/pdf", - "binary_hash": 14853448746796404529, + "binary_hash": 14846044078209721391, "filename": "ocr_test.pdf" }, "furniture": { @@ -18,7 +18,7 @@ "self_ref": "#/body", "children": [ { - "$ref": "#/texts/0" + "$ref": "#/tables/0" } ], "content_layer": "body", @@ -26,44 +26,402 @@ "label": "unspecified" }, "groups": [], - "texts": [ + "texts": [], + "pictures": [], + "tables": [ { - "self_ref": "#/texts/0", + "self_ref": "#/tables/0", "parent": { "$ref": "#/body" }, "children": [], "content_layer": "body", - "label": "text", + "label": "table", "prov": [ { "page_no": 1, "bbox": { - "l": 69.68, - "t": 764.92, - "r": 504.87, - "b": 689.01, + "l": 69.05, + "t": 524.35, + "r": 551.1, + "b": 277.42, "coord_origin": "BOTTOMLEFT" }, "charspan": [ 0, - 94 + 0 ] } ], - "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package" + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "bbox": { + "l": 97.33, + "t": 105.67, + "r": 190.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Vertically merged", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.67, + "t": 204.33, + "r": 168.67, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.67, + "t": 284.0, + "r": 168.67, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.67, + "t": 105.67, + "r": 364.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Other merged column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 188.33, + "r": 349.67, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.67, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.33, + "t": 105.67, + "r": 518.33, + "b": 121.67, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 188.33, + "r": 514.0, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 3, + "num_cols": 3, + "grid": [ + [ + { + "bbox": { + "l": 97.33, + "t": 105.67, + "r": 190.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Vertically merged", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.67, + "t": 105.67, + "r": 364.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Other merged column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.33, + "t": 105.67, + "r": 518.33, + "b": 121.67, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another column", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 121.67, + "t": 204.33, + "r": 168.67, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 188.33, + "r": 349.67, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 188.33, + "r": 514.0, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 121.67, + "t": 284.0, + "r": 168.67, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.67, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + }, + "annotations": [] } ], - "pictures": [], - "tables": [], "key_value_items": [], "form_items": [], "pages": { "1": { "size": { - "width": 595.2, - "height": 841.92 + "width": 792.0, + "height": 612.0 }, "page_no": 1 } diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test.md b/tests/data_scanned/groundtruth/docling_v2/ocr_test.md index 42896546..e3d7c0b8 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test.md +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test.md @@ -1 +1,4 @@ -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package \ No newline at end of file +| Vertically merged | Other merged column | Yet another column | +|---------------------|-----------------------|----------------------| +| value | Some other value | Yet another value | +| value | Some other value | Yet another value | \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test.pages.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test.pages.json index 093688be..4470bece 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test.pages.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test.pages.json @@ -2,8 +2,8 @@ { "page_no": 0, "size": { - "width": 595.2, - "height": 841.92 + "width": 792.0, + "height": 612.0 }, "parsed_page": { "dimension": { @@ -11,47 +11,47 @@ "rect": { "r_x0": 0.0, "r_y0": 0.0, - "r_x1": 595.2, + "r_x1": 792.0, "r_y1": 0.0, - "r_x2": 595.2, - "r_y2": 841.92, + "r_x2": 792.0, + "r_y2": 612.0, "r_x3": 0.0, - "r_y3": 841.92, + "r_y3": 612.0, "coord_origin": "BOTTOMLEFT" }, "boundary_type": "crop_box", "art_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "bleed_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "crop_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "media_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "trim_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" } @@ -69,20 +69,20 @@ "a": 255 }, "rect": { - "r_x0": 73.35, - "r_y0": 98.0, - "r_x1": 503.65, - "r_y1": 98.0, - "r_x2": 503.65, - "r_y2": 77.0, - "r_x3": 73.35, - "r_y3": 77.0, + "r_x0": 97.33, + "r_y0": 126.33, + "r_x1": 190.0, + "r_y1": 126.33, + "r_x2": 190.0, + "r_y2": 105.67, + "r_x3": 97.33, + "r_y3": 105.67, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 89.0, "from_ocr": true }, { @@ -94,20 +94,20 @@ "a": 255 }, "rect": { - "r_x0": 69.68, - "r_y0": 124.83, - "r_x1": 504.87, - "r_y1": 124.83, - "r_x2": 504.87, - "r_y2": 104.0, - "r_x3": 69.68, - "r_y3": 104.0, + "r_x0": 112.0, + "r_y0": 157.67, + "r_x1": 182.33, + "r_y1": 157.67, + "r_x2": 182.33, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true }, { @@ -119,20 +119,345 @@ "a": 255 }, "rect": { - "r_x0": 71.84, - "r_y0": 152.91, - "r_x1": 153.09, - "r_y1": 152.91, - "r_x2": 153.09, - "r_y2": 129.8, - "r_x3": 71.84, - "r_y3": 129.8, + "r_x0": 121.67, + "r_y0": 220.0, + "r_x1": 168.67, + "r_y1": 220.0, + "r_x2": 168.67, + "r_y2": 204.33, + "r_x3": 121.67, + "r_y3": 204.33, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 300.0, + "r_x1": 168.67, + "r_y1": 300.0, + "r_x2": 168.67, + "r_y2": 284.0, + "r_x3": 121.67, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.67, + "r_y0": 126.33, + "r_x1": 364.0, + "r_y1": 126.33, + "r_x2": 364.0, + "r_y2": 105.67, + "r_x3": 232.67, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.67, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.67, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33, + "r_x1": 349.67, + "r_y1": 204.33, + "r_x2": 349.67, + "r_y2": 188.33, + "r_x3": 247.0, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.33, + "r_y1": 236.0, + "r_x2": 324.33, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.67, + "r_y1": 284.0, + "r_x2": 349.67, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33, + "r_x1": 324.33, + "r_y1": 315.33, + "r_x2": 324.33, + "r_y2": 299.67, + "r_x3": 277.0, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.33, + "r_y0": 121.67, + "r_x1": 518.33, + "r_y1": 121.67, + "r_x2": 518.33, + "r_y2": 105.67, + "r_x3": 406.33, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.33, + "r_y0": 153.0, + "r_x1": 499.33, + "r_y1": 153.0, + "r_x2": 499.33, + "r_y2": 137.0, + "r_x3": 429.33, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 204.33, + "r_x1": 514.0, + "r_y1": 204.33, + "r_x2": 514.0, + "r_y2": 188.33, + "r_x3": 408.33, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 236.0, + "r_x1": 487.67, + "r_y1": 236.0, + "r_x2": 487.67, + "r_y2": 220.0, + "r_x3": 440.67, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.33, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 315.33, + "r_x1": 487.67, + "r_y1": 315.33, + "r_x2": 487.67, + "r_y2": 299.67, + "r_x3": 440.67, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], @@ -147,15 +472,15 @@ "clusters": [ { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 69.68, - "t": 77.0, - "r": 504.87, - "b": 152.91, + "l": 69.05, + "t": 87.65, + "r": 551.1, + "b": 334.58, "coord_origin": "TOPLEFT" }, - "confidence": 0.972, + "confidence": 0.979, "cells": [ { "index": 0, @@ -166,20 +491,20 @@ "a": 255 }, "rect": { - "r_x0": 73.35, - "r_y0": 98.0, - "r_x1": 503.65, - "r_y1": 98.0, - "r_x2": 503.65, - "r_y2": 77.0, - "r_x3": 73.35, - "r_y3": 77.0, + "r_x0": 97.33, + "r_y0": 126.33, + "r_x1": 190.0, + "r_y1": 126.33, + "r_x2": 190.0, + "r_y2": 105.67, + "r_x3": 97.33, + "r_y3": 105.67, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 89.0, "from_ocr": true }, { @@ -191,20 +516,20 @@ "a": 255 }, "rect": { - "r_x0": 69.68, - "r_y0": 124.83, - "r_x1": 504.87, - "r_y1": 124.83, - "r_x2": 504.87, - "r_y2": 104.0, - "r_x3": 69.68, - "r_y3": 104.0, + "r_x0": 112.0, + "r_y0": 157.67, + "r_x1": 182.33, + "r_y1": 157.67, + "r_x2": 182.33, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true }, { @@ -216,29 +541,2247 @@ "a": 255 }, "rect": { - "r_x0": 71.84, - "r_y0": 152.91, - "r_x1": 153.09, - "r_y1": 152.91, - "r_x2": 153.09, - "r_y2": 129.8, - "r_x3": 71.84, - "r_y3": 129.8, + "r_x0": 121.67, + "r_y0": 220.0, + "r_x1": 168.67, + "r_y1": 220.0, + "r_x2": 168.67, + "r_y2": 204.33, + "r_x3": 121.67, + "r_y3": 204.33, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 300.0, + "r_x1": 168.67, + "r_y1": 300.0, + "r_x2": 168.67, + "r_y2": 284.0, + "r_x3": 121.67, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.67, + "r_y0": 126.33, + "r_x1": 364.0, + "r_y1": 126.33, + "r_x2": 364.0, + "r_y2": 105.67, + "r_x3": 232.67, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.67, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.67, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33, + "r_x1": 349.67, + "r_y1": 204.33, + "r_x2": 349.67, + "r_y2": 188.33, + "r_x3": 247.0, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.33, + "r_y1": 236.0, + "r_x2": 324.33, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.67, + "r_y1": 284.0, + "r_x2": 349.67, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33, + "r_x1": 324.33, + "r_y1": 315.33, + "r_x2": 324.33, + "r_y2": 299.67, + "r_x3": 277.0, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.33, + "r_y0": 121.67, + "r_x1": 518.33, + "r_y1": 121.67, + "r_x2": 518.33, + "r_y2": 105.67, + "r_x3": 406.33, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.33, + "r_y0": 153.0, + "r_x1": 499.33, + "r_y1": 153.0, + "r_x2": 499.33, + "r_y2": 137.0, + "r_x3": 429.33, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 204.33, + "r_x1": 514.0, + "r_y1": 204.33, + "r_x2": 514.0, + "r_y2": 188.33, + "r_x3": 408.33, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 236.0, + "r_x1": 487.67, + "r_y1": 236.0, + "r_x2": 487.67, + "r_y2": 220.0, + "r_x3": 440.67, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.33, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 315.33, + "r_x1": 487.67, + "r_y1": 315.33, + "r_x2": 487.67, + "r_y2": 299.67, + "r_x3": 440.67, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 97.33, + "t": 105.67, + "r": 190.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 97.33, + "r_y0": 126.33, + "r_x1": 190.0, + "r_y1": 126.33, + "r_x2": 190.0, + "r_y2": 105.67, + "r_x3": 97.33, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 112.0, + "t": 137.0, + "r": 182.33, + "b": 157.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 112.0, + "r_y0": 157.67, + "r_x1": 182.33, + "r_y1": 157.67, + "r_x2": 182.33, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 121.67, + "t": 204.33, + "r": 168.67, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 220.0, + "r_x1": 168.67, + "r_y1": 220.0, + "r_x2": 168.67, + "r_y2": 204.33, + "r_x3": 121.67, + "r_y3": 204.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 121.67, + "t": 284.0, + "r": 168.67, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 300.0, + "r_x1": 168.67, + "r_y1": 300.0, + "r_x2": 168.67, + "r_y2": 284.0, + "r_x3": 121.67, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 232.67, + "t": 105.67, + "r": 364.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.67, + "r_y0": 126.33, + "r_x1": 364.0, + "r_y1": 126.33, + "r_x2": 364.0, + "r_y2": 105.67, + "r_x3": 232.67, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 265.67, + "t": 137.0, + "r": 336.0, + "b": 153.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.67, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.67, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 247.0, + "t": 188.33, + "r": 349.67, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33, + "r_x1": 349.67, + "r_y1": 204.33, + "r_x2": 349.67, + "r_y2": 188.33, + "r_x3": 247.0, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 277.0, + "t": 220.0, + "r": 324.33, + "b": 236.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.33, + "r_y1": 236.0, + "r_x2": 324.33, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.67, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.67, + "r_y1": 284.0, + "r_x2": 349.67, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 277.0, + "t": 299.67, + "r": 324.33, + "b": 315.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33, + "r_x1": 324.33, + "r_y1": 315.33, + "r_x2": 324.33, + "r_y2": 299.67, + "r_x3": 277.0, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 406.33, + "t": 105.67, + "r": 518.33, + "b": 121.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.33, + "r_y0": 121.67, + "r_x1": 518.33, + "r_y1": 121.67, + "r_x2": 518.33, + "r_y2": 105.67, + "r_x3": 406.33, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 429.33, + "t": 137.0, + "r": 499.33, + "b": 153.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.33, + "r_y0": 153.0, + "r_x1": 499.33, + "r_y1": 153.0, + "r_x2": 499.33, + "r_y2": 137.0, + "r_x3": 429.33, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 408.33, + "t": 188.33, + "r": 514.0, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 204.33, + "r_x1": 514.0, + "r_y1": 204.33, + "r_x2": 514.0, + "r_y2": 188.33, + "r_x3": 408.33, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 440.67, + "t": 220.0, + "r": 487.67, + "b": 236.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 236.0, + "r_x1": 487.67, + "r_y1": 236.0, + "r_x2": 487.67, + "r_y2": 220.0, + "r_x3": 440.67, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 408.33, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.33, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 440.67, + "t": 299.67, + "r": 487.67, + "b": 315.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 315.33, + "r_x1": 487.67, + "r_y1": 315.33, + "r_x2": 487.67, + "r_y2": 299.67, + "r_x3": 440.67, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] } ] }, "tablestructure": { - "table_map": {} + "table_map": { + "0": { + "label": "table", + "id": 0, + "page_no": 0, + "cluster": { + "id": 0, + "label": "table", + "bbox": { + "l": 69.05, + "t": 87.65, + "r": 551.1, + "b": 334.58, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.979, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 97.33, + "r_y0": 126.33, + "r_x1": 190.0, + "r_y1": 126.33, + "r_x2": 190.0, + "r_y2": 105.67, + "r_x3": 97.33, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + }, + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 112.0, + "r_y0": 157.67, + "r_x1": 182.33, + "r_y1": 157.67, + "r_x2": 182.33, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 220.0, + "r_x1": 168.67, + "r_y1": 220.0, + "r_x2": 168.67, + "r_y2": 204.33, + "r_x3": 121.67, + "r_y3": 204.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 300.0, + "r_x1": 168.67, + "r_y1": 300.0, + "r_x2": 168.67, + "r_y2": 284.0, + "r_x3": 121.67, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.67, + "r_y0": 126.33, + "r_x1": 364.0, + "r_y1": 126.33, + "r_x2": 364.0, + "r_y2": 105.67, + "r_x3": 232.67, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.67, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.67, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33, + "r_x1": 349.67, + "r_y1": 204.33, + "r_x2": 349.67, + "r_y2": 188.33, + "r_x3": 247.0, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.33, + "r_y1": 236.0, + "r_x2": 324.33, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.67, + "r_y1": 284.0, + "r_x2": 349.67, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33, + "r_x1": 324.33, + "r_y1": 315.33, + "r_x2": 324.33, + "r_y2": 299.67, + "r_x3": 277.0, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.33, + "r_y0": 121.67, + "r_x1": 518.33, + "r_y1": 121.67, + "r_x2": 518.33, + "r_y2": 105.67, + "r_x3": 406.33, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.33, + "r_y0": 153.0, + "r_x1": 499.33, + "r_y1": 153.0, + "r_x2": 499.33, + "r_y2": 137.0, + "r_x3": 429.33, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 204.33, + "r_x1": 514.0, + "r_y1": 204.33, + "r_x2": 514.0, + "r_y2": 188.33, + "r_x3": 408.33, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 236.0, + "r_x1": 487.67, + "r_y1": 236.0, + "r_x2": 487.67, + "r_y2": 220.0, + "r_x3": 440.67, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.33, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 315.33, + "r_x1": 487.67, + "r_y1": 315.33, + "r_x2": 487.67, + "r_y2": 299.67, + "r_x3": 440.67, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 97.33, + "t": 105.67, + "r": 190.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 97.33, + "r_y0": 126.33, + "r_x1": 190.0, + "r_y1": 126.33, + "r_x2": 190.0, + "r_y2": 105.67, + "r_x3": 97.33, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 112.0, + "t": 137.0, + "r": 182.33, + "b": 157.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 112.0, + "r_y0": 157.67, + "r_x1": 182.33, + "r_y1": 157.67, + "r_x2": 182.33, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 121.67, + "t": 204.33, + "r": 168.67, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 220.0, + "r_x1": 168.67, + "r_y1": 220.0, + "r_x2": 168.67, + "r_y2": 204.33, + "r_x3": 121.67, + "r_y3": 204.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 121.67, + "t": 284.0, + "r": 168.67, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 300.0, + "r_x1": 168.67, + "r_y1": 300.0, + "r_x2": 168.67, + "r_y2": 284.0, + "r_x3": 121.67, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 232.67, + "t": 105.67, + "r": 364.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.67, + "r_y0": 126.33, + "r_x1": 364.0, + "r_y1": 126.33, + "r_x2": 364.0, + "r_y2": 105.67, + "r_x3": 232.67, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 265.67, + "t": 137.0, + "r": 336.0, + "b": 153.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.67, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.67, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 247.0, + "t": 188.33, + "r": 349.67, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33, + "r_x1": 349.67, + "r_y1": 204.33, + "r_x2": 349.67, + "r_y2": 188.33, + "r_x3": 247.0, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 277.0, + "t": 220.0, + "r": 324.33, + "b": 236.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.33, + "r_y1": 236.0, + "r_x2": 324.33, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.67, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.67, + "r_y1": 284.0, + "r_x2": 349.67, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 277.0, + "t": 299.67, + "r": 324.33, + "b": 315.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33, + "r_x1": 324.33, + "r_y1": 315.33, + "r_x2": 324.33, + "r_y2": 299.67, + "r_x3": 277.0, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 406.33, + "t": 105.67, + "r": 518.33, + "b": 121.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.33, + "r_y0": 121.67, + "r_x1": 518.33, + "r_y1": 121.67, + "r_x2": 518.33, + "r_y2": 105.67, + "r_x3": 406.33, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 429.33, + "t": 137.0, + "r": 499.33, + "b": 153.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.33, + "r_y0": 153.0, + "r_x1": 499.33, + "r_y1": 153.0, + "r_x2": 499.33, + "r_y2": 137.0, + "r_x3": 429.33, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 408.33, + "t": 188.33, + "r": 514.0, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 204.33, + "r_x1": 514.0, + "r_y1": 204.33, + "r_x2": 514.0, + "r_y2": 188.33, + "r_x3": 408.33, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 440.67, + "t": 220.0, + "r": 487.67, + "b": 236.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 236.0, + "r_x1": 487.67, + "r_y1": 236.0, + "r_x2": 487.67, + "r_y2": 220.0, + "r_x3": 440.67, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 408.33, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.33, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 440.67, + "t": 299.67, + "r": 487.67, + "b": 315.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 315.33, + "r_x1": 487.67, + "r_y1": 315.33, + "r_x2": 487.67, + "r_y2": 299.67, + "r_x3": 440.67, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 3, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 97.33, + "t": 105.67, + "r": 190.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Vertically merged", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.67, + "t": 204.33, + "r": 168.67, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.67, + "t": 284.0, + "r": 168.67, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.67, + "t": 105.67, + "r": 364.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Other merged column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 188.33, + "r": 349.67, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.67, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.33, + "t": 105.67, + "r": 518.33, + "b": 121.67, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 188.33, + "r": 514.0, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + } + } }, "figures_classification": null, "equations_prediction": null, @@ -247,20 +2790,20 @@ "assembled": { "elements": [ { - "label": "text", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 69.68, - "t": 77.0, - "r": 504.87, - "b": 152.91, + "l": 69.05, + "t": 87.65, + "r": 551.1, + "b": 334.58, "coord_origin": "TOPLEFT" }, - "confidence": 0.972, + "confidence": 0.979, "cells": [ { "index": 0, @@ -271,20 +2814,20 @@ "a": 255 }, "rect": { - "r_x0": 73.35, - "r_y0": 98.0, - "r_x1": 503.65, - "r_y1": 98.0, - "r_x2": 503.65, - "r_y2": 77.0, - "r_x3": 73.35, - "r_y3": 77.0, + "r_x0": 97.33, + "r_y0": 126.33, + "r_x1": 190.0, + "r_y1": 126.33, + "r_x2": 190.0, + "r_y2": 105.67, + "r_x3": 97.33, + "r_y3": 105.67, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 89.0, "from_ocr": true }, { @@ -296,20 +2839,20 @@ "a": 255 }, "rect": { - "r_x0": 69.68, - "r_y0": 124.83, - "r_x1": 504.87, - "r_y1": 124.83, - "r_x2": 504.87, - "r_y2": 104.0, - "r_x3": 69.68, - "r_y3": 104.0, + "r_x0": 112.0, + "r_y0": 157.67, + "r_x1": 182.33, + "r_y1": 157.67, + "r_x2": 182.33, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true }, { @@ -321,44 +2864,1199 @@ "a": 255 }, "rect": { - "r_x0": 71.84, - "r_y0": 152.91, - "r_x1": 153.09, - "r_y1": 152.91, - "r_x2": 153.09, - "r_y2": 129.8, - "r_x3": 71.84, - "r_y3": 129.8, + "r_x0": 121.67, + "r_y0": 220.0, + "r_x1": 168.67, + "r_y1": 220.0, + "r_x2": 168.67, + "r_y2": 204.33, + "r_x3": 121.67, + "r_y3": 204.33, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 300.0, + "r_x1": 168.67, + "r_y1": 300.0, + "r_x2": 168.67, + "r_y2": 284.0, + "r_x3": 121.67, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.67, + "r_y0": 126.33, + "r_x1": 364.0, + "r_y1": 126.33, + "r_x2": 364.0, + "r_y2": 105.67, + "r_x3": 232.67, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.67, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.67, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33, + "r_x1": 349.67, + "r_y1": 204.33, + "r_x2": 349.67, + "r_y2": 188.33, + "r_x3": 247.0, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.33, + "r_y1": 236.0, + "r_x2": 324.33, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.67, + "r_y1": 284.0, + "r_x2": 349.67, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33, + "r_x1": 324.33, + "r_y1": 315.33, + "r_x2": 324.33, + "r_y2": 299.67, + "r_x3": 277.0, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.33, + "r_y0": 121.67, + "r_x1": 518.33, + "r_y1": 121.67, + "r_x2": 518.33, + "r_y2": 105.67, + "r_x3": 406.33, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.33, + "r_y0": 153.0, + "r_x1": 499.33, + "r_y1": 153.0, + "r_x2": 499.33, + "r_y2": 137.0, + "r_x3": 429.33, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 204.33, + "r_x1": 514.0, + "r_y1": 204.33, + "r_x2": 514.0, + "r_y2": 188.33, + "r_x3": 408.33, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 236.0, + "r_x1": 487.67, + "r_y1": 236.0, + "r_x2": 487.67, + "r_y2": 220.0, + "r_x3": 440.67, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.33, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 315.33, + "r_x1": 487.67, + "r_y1": 315.33, + "r_x2": 487.67, + "r_y2": 299.67, + "r_x3": 440.67, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 97.33, + "t": 105.67, + "r": 190.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 97.33, + "r_y0": 126.33, + "r_x1": 190.0, + "r_y1": 126.33, + "r_x2": 190.0, + "r_y2": 105.67, + "r_x3": 97.33, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 112.0, + "t": 137.0, + "r": 182.33, + "b": 157.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 112.0, + "r_y0": 157.67, + "r_x1": 182.33, + "r_y1": 157.67, + "r_x2": 182.33, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 121.67, + "t": 204.33, + "r": 168.67, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 220.0, + "r_x1": 168.67, + "r_y1": 220.0, + "r_x2": 168.67, + "r_y2": 204.33, + "r_x3": 121.67, + "r_y3": 204.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 121.67, + "t": 284.0, + "r": 168.67, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 300.0, + "r_x1": 168.67, + "r_y1": 300.0, + "r_x2": 168.67, + "r_y2": 284.0, + "r_x3": 121.67, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 232.67, + "t": 105.67, + "r": 364.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.67, + "r_y0": 126.33, + "r_x1": 364.0, + "r_y1": 126.33, + "r_x2": 364.0, + "r_y2": 105.67, + "r_x3": 232.67, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 265.67, + "t": 137.0, + "r": 336.0, + "b": 153.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.67, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.67, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 247.0, + "t": 188.33, + "r": 349.67, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33, + "r_x1": 349.67, + "r_y1": 204.33, + "r_x2": 349.67, + "r_y2": 188.33, + "r_x3": 247.0, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 277.0, + "t": 220.0, + "r": 324.33, + "b": 236.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.33, + "r_y1": 236.0, + "r_x2": 324.33, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.67, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.67, + "r_y1": 284.0, + "r_x2": 349.67, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 277.0, + "t": 299.67, + "r": 324.33, + "b": 315.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33, + "r_x1": 324.33, + "r_y1": 315.33, + "r_x2": 324.33, + "r_y2": 299.67, + "r_x3": 277.0, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 406.33, + "t": 105.67, + "r": 518.33, + "b": 121.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.33, + "r_y0": 121.67, + "r_x1": 518.33, + "r_y1": 121.67, + "r_x2": 518.33, + "r_y2": 105.67, + "r_x3": 406.33, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 429.33, + "t": 137.0, + "r": 499.33, + "b": 153.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.33, + "r_y0": 153.0, + "r_x1": 499.33, + "r_y1": 153.0, + "r_x2": 499.33, + "r_y2": 137.0, + "r_x3": 429.33, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 408.33, + "t": 188.33, + "r": 514.0, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 204.33, + "r_x1": 514.0, + "r_y1": 204.33, + "r_x2": 514.0, + "r_y2": 188.33, + "r_x3": 408.33, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 440.67, + "t": 220.0, + "r": 487.67, + "b": 236.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 236.0, + "r_x1": 487.67, + "r_y1": 236.0, + "r_x2": 487.67, + "r_y2": 220.0, + "r_x3": 440.67, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 408.33, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.33, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 440.67, + "t": 299.67, + "r": 487.67, + "b": 315.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 315.33, + "r_x1": 487.67, + "r_y1": 315.33, + "r_x2": 487.67, + "r_y2": 299.67, + "r_x3": 440.67, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 3, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 97.33, + "t": 105.67, + "r": 190.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Vertically merged", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.67, + "t": 204.33, + "r": 168.67, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.67, + "t": 284.0, + "r": 168.67, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.67, + "t": 105.67, + "r": 364.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Other merged column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 188.33, + "r": 349.67, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.67, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.33, + "t": 105.67, + "r": 518.33, + "b": 121.67, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 188.33, + "r": 514.0, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } ], "body": [ { - "label": "text", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 69.68, - "t": 77.0, - "r": 504.87, - "b": 152.91, + "l": 69.05, + "t": 87.65, + "r": 551.1, + "b": 334.58, "coord_origin": "TOPLEFT" }, - "confidence": 0.972, + "confidence": 0.979, "cells": [ { "index": 0, @@ -369,20 +4067,20 @@ "a": 255 }, "rect": { - "r_x0": 73.35, - "r_y0": 98.0, - "r_x1": 503.65, - "r_y1": 98.0, - "r_x2": 503.65, - "r_y2": 77.0, - "r_x3": 73.35, - "r_y3": 77.0, + "r_x0": 97.33, + "r_y0": 126.33, + "r_x1": 190.0, + "r_y1": 126.33, + "r_x2": 190.0, + "r_y2": 105.67, + "r_x3": 97.33, + "r_y3": 105.67, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 89.0, "from_ocr": true }, { @@ -394,20 +4092,20 @@ "a": 255 }, "rect": { - "r_x0": 69.68, - "r_y0": 124.83, - "r_x1": 504.87, - "r_y1": 124.83, - "r_x2": 504.87, - "r_y2": 104.0, - "r_x3": 69.68, - "r_y3": 104.0, + "r_x0": 112.0, + "r_y0": 157.67, + "r_x1": 182.33, + "r_y1": 157.67, + "r_x2": 182.33, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true }, { @@ -419,26 +4117,1181 @@ "a": 255 }, "rect": { - "r_x0": 71.84, - "r_y0": 152.91, - "r_x1": 153.09, - "r_y1": 152.91, - "r_x2": 153.09, - "r_y2": 129.8, - "r_x3": 71.84, - "r_y3": 129.8, + "r_x0": 121.67, + "r_y0": 220.0, + "r_x1": 168.67, + "r_y1": 220.0, + "r_x2": 168.67, + "r_y2": 204.33, + "r_x3": 121.67, + "r_y3": 204.33, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 300.0, + "r_x1": 168.67, + "r_y1": 300.0, + "r_x2": 168.67, + "r_y2": 284.0, + "r_x3": 121.67, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.67, + "r_y0": 126.33, + "r_x1": 364.0, + "r_y1": 126.33, + "r_x2": 364.0, + "r_y2": 105.67, + "r_x3": 232.67, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.67, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.67, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33, + "r_x1": 349.67, + "r_y1": 204.33, + "r_x2": 349.67, + "r_y2": 188.33, + "r_x3": 247.0, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.33, + "r_y1": 236.0, + "r_x2": 324.33, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.67, + "r_y1": 284.0, + "r_x2": 349.67, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33, + "r_x1": 324.33, + "r_y1": 315.33, + "r_x2": 324.33, + "r_y2": 299.67, + "r_x3": 277.0, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.33, + "r_y0": 121.67, + "r_x1": 518.33, + "r_y1": 121.67, + "r_x2": 518.33, + "r_y2": 105.67, + "r_x3": 406.33, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.33, + "r_y0": 153.0, + "r_x1": 499.33, + "r_y1": 153.0, + "r_x2": 499.33, + "r_y2": 137.0, + "r_x3": 429.33, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 204.33, + "r_x1": 514.0, + "r_y1": 204.33, + "r_x2": 514.0, + "r_y2": 188.33, + "r_x3": 408.33, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 236.0, + "r_x1": 487.67, + "r_y1": 236.0, + "r_x2": 487.67, + "r_y2": 220.0, + "r_x3": 440.67, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.33, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 315.33, + "r_x1": 487.67, + "r_y1": 315.33, + "r_x2": 487.67, + "r_y2": 299.67, + "r_x3": 440.67, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 97.33, + "t": 105.67, + "r": 190.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 97.33, + "r_y0": 126.33, + "r_x1": 190.0, + "r_y1": 126.33, + "r_x2": 190.0, + "r_y2": 105.67, + "r_x3": 97.33, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 112.0, + "t": 137.0, + "r": 182.33, + "b": 157.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 112.0, + "r_y0": 157.67, + "r_x1": 182.33, + "r_y1": 157.67, + "r_x2": 182.33, + "r_y2": 137.0, + "r_x3": 112.0, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 121.67, + "t": 204.33, + "r": 168.67, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 220.0, + "r_x1": 168.67, + "r_y1": 220.0, + "r_x2": 168.67, + "r_y2": 204.33, + "r_x3": 121.67, + "r_y3": 204.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 121.67, + "t": 284.0, + "r": 168.67, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 300.0, + "r_x1": 168.67, + "r_y1": 300.0, + "r_x2": 168.67, + "r_y2": 284.0, + "r_x3": 121.67, + "r_y3": 284.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 232.67, + "t": 105.67, + "r": 364.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 232.67, + "r_y0": 126.33, + "r_x1": 364.0, + "r_y1": 126.33, + "r_x2": 364.0, + "r_y2": 105.67, + "r_x3": 232.67, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 265.67, + "t": 137.0, + "r": 336.0, + "b": 153.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 265.67, + "r_y0": 153.0, + "r_x1": 336.0, + "r_y1": 153.0, + "r_x2": 336.0, + "r_y2": 137.0, + "r_x3": 265.67, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 247.0, + "t": 188.33, + "r": 349.67, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 204.33, + "r_x1": 349.67, + "r_y1": 204.33, + "r_x2": 349.67, + "r_y2": 188.33, + "r_x3": 247.0, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 277.0, + "t": 220.0, + "r": 324.33, + "b": 236.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 236.0, + "r_x1": 324.33, + "r_y1": 236.0, + "r_x2": 324.33, + "r_y2": 220.0, + "r_x3": 277.0, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.67, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 247.0, + "r_y0": 284.0, + "r_x1": 349.67, + "r_y1": 284.0, + "r_x2": 349.67, + "r_y2": 268.0, + "r_x3": 247.0, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 277.0, + "t": 299.67, + "r": 324.33, + "b": 315.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 277.0, + "r_y0": 315.33, + "r_x1": 324.33, + "r_y1": 315.33, + "r_x2": 324.33, + "r_y2": 299.67, + "r_x3": 277.0, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 406.33, + "t": 105.67, + "r": 518.33, + "b": 121.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 406.33, + "r_y0": 121.67, + "r_x1": 518.33, + "r_y1": 121.67, + "r_x2": 518.33, + "r_y2": 105.67, + "r_x3": 406.33, + "r_y3": 105.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 429.33, + "t": 137.0, + "r": 499.33, + "b": 153.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 429.33, + "r_y0": 153.0, + "r_x1": 499.33, + "r_y1": 153.0, + "r_x2": 499.33, + "r_y2": 137.0, + "r_x3": 429.33, + "r_y3": 137.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 408.33, + "t": 188.33, + "r": 514.0, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 204.33, + "r_x1": 514.0, + "r_y1": 204.33, + "r_x2": 514.0, + "r_y2": 188.33, + "r_x3": 408.33, + "r_y3": 188.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 440.67, + "t": 220.0, + "r": 487.67, + "b": 236.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 236.0, + "r_x1": 487.67, + "r_y1": 236.0, + "r_x2": 487.67, + "r_y2": 220.0, + "r_x3": 440.67, + "r_y3": 220.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 408.33, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.33, + "r_y0": 284.0, + "r_x1": 514.0, + "r_y1": 284.0, + "r_x2": 514.0, + "r_y2": 268.0, + "r_x3": 408.33, + "r_y3": 268.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 440.67, + "t": 299.67, + "r": 487.67, + "b": 315.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 440.67, + "r_y0": 315.33, + "r_x1": 487.67, + "r_y1": 315.33, + "r_x2": 487.67, + "r_y2": 299.67, + "r_x3": 440.67, + "r_y3": 299.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 3, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 97.33, + "t": 105.67, + "r": 190.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Vertically merged", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.67, + "t": 204.33, + "r": 168.67, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.67, + "t": 284.0, + "r": 168.67, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.67, + "t": 105.67, + "r": 364.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Other merged column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 188.33, + "r": 349.67, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.67, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.33, + "t": 105.67, + "r": 518.33, + "b": 121.67, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 188.33, + "r": 514.0, + "b": 204.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } ], "headers": [] diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.doctags.txt b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.doctags.txt index 405aa96e..1d7f0742 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.doctags.txt +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.doctags.txt @@ -1,3 +1,2 @@ -package -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained +Vertically mergedOther merged columnYet another columnvalueSome other valueYet another valuevalueSome other valueYet another value \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.json index c282ed1d..76cc6098 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.json @@ -4,7 +4,7 @@ "name": "ocr_test_rotated_180", "origin": { "mimetype": "application/pdf", - "binary_hash": 2530576989861832966, + "binary_hash": 16151733167151414937, "filename": "ocr_test_rotated_180.pdf" }, "furniture": { @@ -18,10 +18,7 @@ "self_ref": "#/body", "children": [ { - "$ref": "#/texts/0" - }, - { - "$ref": "#/texts/1" + "$ref": "#/tables/0" } ], "content_layer": "body", @@ -29,71 +26,402 @@ "label": "unspecified" }, "groups": [], - "texts": [ + "texts": [], + "pictures": [], + "tables": [ { - "self_ref": "#/texts/0", + "self_ref": "#/tables/0", "parent": { "$ref": "#/body" }, "children": [], "content_layer": "body", - "label": "text", + "label": "table", "prov": [ { "page_no": 1, "bbox": { - "l": 441.26, - "t": 151.88, - "r": 522.03, - "b": 131.89, + "l": 240.9, + "t": 334.58, + "r": 722.95, + "b": 87.65, "coord_origin": "BOTTOMLEFT" }, "charspan": [ 0, - 7 + 0 ] } ], - "orig": "package", - "text": "package" - }, - { - "self_ref": "#/texts/1", - "parent": { - "$ref": "#/body" + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "bbox": { + "l": 97.33, + "t": 105.67, + "r": 190.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Vertically merged", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.67, + "t": 204.0, + "r": 168.67, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.67, + "t": 284.0, + "r": 168.67, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.33, + "t": 105.67, + "r": 363.67, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Other merged column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 188.0, + "r": 349.67, + "b": 204.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.67, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.33, + "t": 105.67, + "r": 518.0, + "b": 121.67, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 188.0, + "r": 514.0, + "b": 204.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 3, + "num_cols": 3, + "grid": [ + [ + { + "bbox": { + "l": 97.33, + "t": 105.67, + "r": 190.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Vertically merged", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.33, + "t": 105.67, + "r": 363.67, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Other merged column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.33, + "t": 105.67, + "r": 518.0, + "b": 121.67, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another column", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 121.67, + "t": 204.0, + "r": 168.67, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 188.0, + "r": 349.67, + "b": 204.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 188.0, + "r": 514.0, + "b": 204.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 121.67, + "t": 284.0, + "r": 168.67, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.67, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] }, - "children": [], - "content_layer": "body", - "label": "text", - "prov": [ - { - "page_no": 1, - "bbox": { - "l": 89.24, - "t": 124.75, - "r": 523.21, - "b": 77.02, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 86 - ] - } - ], - "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" + "annotations": [] } ], - "pictures": [], - "tables": [], "key_value_items": [], "form_items": [], "pages": { "1": { "size": { - "width": 595.2, - "height": 841.92 + "width": 792.0, + "height": 612.0 }, "page_no": 1 } diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.md b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.md index f5d50b5c..e3d7c0b8 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.md +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.md @@ -1,3 +1,4 @@ -package - -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained \ No newline at end of file +| Vertically merged | Other merged column | Yet another column | +|---------------------|-----------------------|----------------------| +| value | Some other value | Yet another value | +| value | Some other value | Yet another value | \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.pages.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.pages.json index 3001a46f..6d1b52fb 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.pages.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.pages.json @@ -2,8 +2,8 @@ { "page_no": 0, "size": { - "width": 595.2, - "height": 841.92 + "width": 792.0, + "height": 612.0 }, "parsed_page": { "dimension": { @@ -11,47 +11,47 @@ "rect": { "r_x0": 0.0, "r_y0": 0.0, - "r_x1": 595.2, + "r_x1": 792.0, "r_y1": 0.0, - "r_x2": 595.2, - "r_y2": 841.92, + "r_x2": 792.0, + "r_y2": 612.0, "r_x3": 0.0, - "r_y3": 841.92, + "r_y3": 612.0, "coord_origin": "BOTTOMLEFT" }, "boundary_type": "crop_box", "art_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "bleed_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "crop_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "media_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "trim_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" } @@ -69,20 +69,20 @@ "a": 255 }, "rect": { - "r_x0": 89.24, - "r_y0": 764.9, - "r_x1": 521.99, - "r_y1": 764.9, - "r_x2": 521.99, - "r_y2": 744.09, - "r_x3": 89.24, - "r_y3": 744.09, + "r_x0": 694.67, + "r_y0": 485.67, + "r_x1": 602.0, + "r_y1": 485.67, + "r_x2": 602.0, + "r_y2": 506.33, + "r_x3": 694.67, + "r_y3": 506.33, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -94,20 +94,20 @@ "a": 255 }, "rect": { - "r_x0": 89.24, - "r_y0": 739.2, - "r_x1": 523.21, - "r_y1": 739.2, - "r_x2": 523.21, - "r_y2": 717.17, - "r_x3": 89.24, - "r_y3": 717.17, + "r_x0": 680.0, + "r_y0": 454.33, + "r_x1": 610.0, + "r_y1": 454.33, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -119,20 +119,345 @@ "a": 255 }, "rect": { - "r_x0": 441.26, - "r_y0": 710.03, - "r_x1": 522.03, - "r_y1": 710.03, - "r_x2": 522.03, - "r_y2": 690.04, - "r_x3": 441.26, - "r_y3": 690.04, + "r_x0": 670.33, + "r_y0": 392.0, + "r_x1": 623.33, + "r_y1": 392.0, + "r_x2": 623.33, + "r_y2": 408.0, + "r_x3": 670.33, + "r_y3": 408.0, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.33, + "r_y0": 312.0, + "r_x1": 623.33, + "r_y1": 312.0, + "r_x2": 623.33, + "r_y2": 328.0, + "r_x3": 670.33, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.67, + "r_y0": 485.67, + "r_x1": 428.33, + "r_y1": 485.67, + "r_x2": 428.33, + "r_y2": 506.33, + "r_x3": 559.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.67, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.67, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.33, + "r_y1": 408.0, + "r_x2": 442.33, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.33, + "r_y1": 328.0, + "r_x2": 442.33, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.67, + "r_x1": 468.0, + "r_y1": 296.67, + "r_x2": 468.0, + "r_y2": 312.67, + "r_x3": 515.0, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.67, + "r_y0": 490.33, + "r_x1": 274.0, + "r_y1": 490.33, + "r_x2": 274.0, + "r_y2": 506.33, + "r_x3": 385.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.67, + "r_y1": 459.0, + "r_x2": 292.67, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.67, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 376.0, + "r_x1": 304.33, + "r_y1": 376.0, + "r_x2": 304.33, + "r_y2": 392.0, + "r_x3": 351.33, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.67, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 296.67, + "r_x1": 304.33, + "r_y1": 296.67, + "r_x2": 304.33, + "r_y2": 312.67, + "r_x3": 351.33, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], @@ -147,15 +472,15 @@ "clusters": [ { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 89.24, - "t": 717.17, - "r": 523.21, - "b": 764.9, + "l": 240.9, + "t": 277.42, + "r": 722.95, + "b": 524.35, "coord_origin": "TOPLEFT" }, - "confidence": 0.732, + "confidence": 0.979, "cells": [ { "index": 0, @@ -166,20 +491,20 @@ "a": 255 }, "rect": { - "r_x0": 89.24, - "r_y0": 764.9, - "r_x1": 521.99, - "r_y1": 764.9, - "r_x2": 521.99, - "r_y2": 744.09, - "r_x3": 89.24, - "r_y3": 744.09, + "r_x0": 694.67, + "r_y0": 485.67, + "r_x1": 602.0, + "r_y1": 485.67, + "r_x2": 602.0, + "r_y2": 506.33, + "r_x3": 694.67, + "r_y3": 506.33, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -191,37 +516,22 @@ "a": 255 }, "rect": { - "r_x0": 89.24, - "r_y0": 739.2, - "r_x1": 523.21, - "r_y1": 739.2, - "r_x2": 523.21, - "r_y2": 717.17, - "r_x3": 89.24, - "r_y3": 717.17, + "r_x0": 680.0, + "r_y0": 454.33, + "r_x1": 610.0, + "r_y1": 454.33, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true - } - ], - "children": [] - }, - { - "id": 2, - "label": "text", - "bbox": { - "l": 441.26, - "t": 690.04, - "r": 522.03, - "b": 710.03, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.598, - "cells": [ + }, { "index": 2, "rgba": { @@ -231,29 +541,2247 @@ "a": 255 }, "rect": { - "r_x0": 441.26, - "r_y0": 710.03, - "r_x1": 522.03, - "r_y1": 710.03, - "r_x2": 522.03, - "r_y2": 690.04, - "r_x3": 441.26, - "r_y3": 690.04, + "r_x0": 670.33, + "r_y0": 392.0, + "r_x1": 623.33, + "r_y1": 392.0, + "r_x2": 623.33, + "r_y2": 408.0, + "r_x3": 670.33, + "r_y3": 408.0, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.33, + "r_y0": 312.0, + "r_x1": 623.33, + "r_y1": 312.0, + "r_x2": 623.33, + "r_y2": 328.0, + "r_x3": 670.33, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.67, + "r_y0": 485.67, + "r_x1": 428.33, + "r_y1": 485.67, + "r_x2": 428.33, + "r_y2": 506.33, + "r_x3": 559.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.67, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.67, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.33, + "r_y1": 408.0, + "r_x2": 442.33, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.33, + "r_y1": 328.0, + "r_x2": 442.33, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.67, + "r_x1": 468.0, + "r_y1": 296.67, + "r_x2": 468.0, + "r_y2": 312.67, + "r_x3": 515.0, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.67, + "r_y0": 490.33, + "r_x1": 274.0, + "r_y1": 490.33, + "r_x2": 274.0, + "r_y2": 506.33, + "r_x3": 385.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.67, + "r_y1": 459.0, + "r_x2": 292.67, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.67, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 376.0, + "r_x1": 304.33, + "r_y1": 376.0, + "r_x2": 304.33, + "r_y2": 392.0, + "r_x3": 351.33, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.67, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 296.67, + "r_x1": 304.33, + "r_y1": 296.67, + "r_x2": 304.33, + "r_y2": 312.67, + "r_x3": 351.33, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 602.0, + "t": 485.67, + "r": 694.67, + "b": 506.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 694.67, + "r_y0": 485.67, + "r_x1": 602.0, + "r_y1": 485.67, + "r_x2": 602.0, + "r_y2": 506.33, + "r_x3": 694.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 610.0, + "t": 454.33, + "r": 680.0, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 680.0, + "r_y0": 454.33, + "r_x1": 610.0, + "r_y1": 454.33, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 623.33, + "t": 392.0, + "r": 670.33, + "b": 408.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.33, + "r_y0": 392.0, + "r_x1": 623.33, + "r_y1": 392.0, + "r_x2": 623.33, + "r_y2": 408.0, + "r_x3": 670.33, + "r_y3": 408.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 623.33, + "t": 312.0, + "r": 670.33, + "b": 328.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.33, + "r_y0": 312.0, + "r_x1": 623.33, + "r_y1": 312.0, + "r_x2": 623.33, + "r_y2": 328.0, + "r_x3": 670.33, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 428.33, + "t": 485.67, + "r": 559.67, + "b": 506.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.67, + "r_y0": 485.67, + "r_x1": 428.33, + "r_y1": 485.67, + "r_x2": 428.33, + "r_y2": 506.33, + "r_x3": 559.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 456.0, + "t": 459.0, + "r": 526.67, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.67, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.67, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 442.33, + "t": 408.0, + "r": 545.0, + "b": 424.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 91.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.33, + "r_y1": 408.0, + "r_x2": 442.33, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 468.0, + "t": 376.0, + "r": 515.0, + "b": 392.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 442.33, + "t": 328.0, + "r": 545.0, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.33, + "r_y1": 328.0, + "r_x2": 442.33, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 468.0, + "t": 296.67, + "r": 515.0, + "b": 312.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.67, + "r_x1": 468.0, + "r_y1": 296.67, + "r_x2": 468.0, + "r_y2": 312.67, + "r_x3": 515.0, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 274.0, + "t": 490.33, + "r": 385.67, + "b": 506.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.67, + "r_y0": 490.33, + "r_x1": 274.0, + "r_y1": 490.33, + "r_x2": 274.0, + "r_y2": 506.33, + "r_x3": 385.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 292.67, + "t": 459.0, + "r": 363.0, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.67, + "r_y1": 459.0, + "r_x2": 292.67, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 278.0, + "t": 408.0, + "r": 383.67, + "b": 424.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.67, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 304.33, + "t": 376.0, + "r": 351.33, + "b": 392.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 376.0, + "r_x1": 304.33, + "r_y1": 376.0, + "r_x2": 304.33, + "r_y2": 392.0, + "r_x3": 351.33, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 278.0, + "t": 328.0, + "r": 383.67, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.67, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 304.33, + "t": 296.67, + "r": 351.33, + "b": 312.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 296.67, + "r_x1": 304.33, + "r_y1": 296.67, + "r_x2": 304.33, + "r_y2": 312.67, + "r_x3": 351.33, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] } ] }, "tablestructure": { - "table_map": {} + "table_map": { + "0": { + "label": "table", + "id": 0, + "page_no": 0, + "cluster": { + "id": 0, + "label": "table", + "bbox": { + "l": 240.9, + "t": 277.42, + "r": 722.95, + "b": 524.35, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.979, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 694.67, + "r_y0": 485.67, + "r_x1": 602.0, + "r_y1": 485.67, + "r_x2": 602.0, + "r_y2": 506.33, + "r_x3": 694.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + }, + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 680.0, + "r_y0": 454.33, + "r_x1": 610.0, + "r_y1": 454.33, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.33, + "r_y0": 392.0, + "r_x1": 623.33, + "r_y1": 392.0, + "r_x2": 623.33, + "r_y2": 408.0, + "r_x3": 670.33, + "r_y3": 408.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.33, + "r_y0": 312.0, + "r_x1": 623.33, + "r_y1": 312.0, + "r_x2": 623.33, + "r_y2": 328.0, + "r_x3": 670.33, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.67, + "r_y0": 485.67, + "r_x1": 428.33, + "r_y1": 485.67, + "r_x2": 428.33, + "r_y2": 506.33, + "r_x3": 559.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.67, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.67, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.33, + "r_y1": 408.0, + "r_x2": 442.33, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.33, + "r_y1": 328.0, + "r_x2": 442.33, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.67, + "r_x1": 468.0, + "r_y1": 296.67, + "r_x2": 468.0, + "r_y2": 312.67, + "r_x3": 515.0, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.67, + "r_y0": 490.33, + "r_x1": 274.0, + "r_y1": 490.33, + "r_x2": 274.0, + "r_y2": 506.33, + "r_x3": 385.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.67, + "r_y1": 459.0, + "r_x2": 292.67, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.67, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 376.0, + "r_x1": 304.33, + "r_y1": 376.0, + "r_x2": 304.33, + "r_y2": 392.0, + "r_x3": 351.33, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.67, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 296.67, + "r_x1": 304.33, + "r_y1": 296.67, + "r_x2": 304.33, + "r_y2": 312.67, + "r_x3": 351.33, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 602.0, + "t": 485.67, + "r": 694.67, + "b": 506.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 694.67, + "r_y0": 485.67, + "r_x1": 602.0, + "r_y1": 485.67, + "r_x2": 602.0, + "r_y2": 506.33, + "r_x3": 694.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 610.0, + "t": 454.33, + "r": 680.0, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 680.0, + "r_y0": 454.33, + "r_x1": 610.0, + "r_y1": 454.33, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 623.33, + "t": 392.0, + "r": 670.33, + "b": 408.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.33, + "r_y0": 392.0, + "r_x1": 623.33, + "r_y1": 392.0, + "r_x2": 623.33, + "r_y2": 408.0, + "r_x3": 670.33, + "r_y3": 408.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 623.33, + "t": 312.0, + "r": 670.33, + "b": 328.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.33, + "r_y0": 312.0, + "r_x1": 623.33, + "r_y1": 312.0, + "r_x2": 623.33, + "r_y2": 328.0, + "r_x3": 670.33, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 428.33, + "t": 485.67, + "r": 559.67, + "b": 506.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.67, + "r_y0": 485.67, + "r_x1": 428.33, + "r_y1": 485.67, + "r_x2": 428.33, + "r_y2": 506.33, + "r_x3": 559.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 456.0, + "t": 459.0, + "r": 526.67, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.67, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.67, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 442.33, + "t": 408.0, + "r": 545.0, + "b": 424.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 91.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.33, + "r_y1": 408.0, + "r_x2": 442.33, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 468.0, + "t": 376.0, + "r": 515.0, + "b": 392.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 442.33, + "t": 328.0, + "r": 545.0, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.33, + "r_y1": 328.0, + "r_x2": 442.33, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 468.0, + "t": 296.67, + "r": 515.0, + "b": 312.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.67, + "r_x1": 468.0, + "r_y1": 296.67, + "r_x2": 468.0, + "r_y2": 312.67, + "r_x3": 515.0, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 274.0, + "t": 490.33, + "r": 385.67, + "b": 506.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.67, + "r_y0": 490.33, + "r_x1": 274.0, + "r_y1": 490.33, + "r_x2": 274.0, + "r_y2": 506.33, + "r_x3": 385.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 292.67, + "t": 459.0, + "r": 363.0, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.67, + "r_y1": 459.0, + "r_x2": 292.67, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 278.0, + "t": 408.0, + "r": 383.67, + "b": 424.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.67, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 304.33, + "t": 376.0, + "r": 351.33, + "b": 392.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 376.0, + "r_x1": 304.33, + "r_y1": 376.0, + "r_x2": 304.33, + "r_y2": 392.0, + "r_x3": 351.33, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 278.0, + "t": 328.0, + "r": 383.67, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.67, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 304.33, + "t": 296.67, + "r": 351.33, + "b": 312.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 296.67, + "r_x1": 304.33, + "r_y1": 296.67, + "r_x2": 304.33, + "r_y2": 312.67, + "r_x3": 351.33, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 3, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 97.33, + "t": 105.67, + "r": 190.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Vertically merged", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.67, + "t": 204.0, + "r": 168.67, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.67, + "t": 284.0, + "r": 168.67, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.33, + "t": 105.67, + "r": 363.67, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Other merged column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 188.0, + "r": 349.67, + "b": 204.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.67, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.33, + "t": 105.67, + "r": 518.0, + "b": 121.67, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 188.0, + "r": 514.0, + "b": 204.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + } + } }, "figures_classification": null, "equations_prediction": null, @@ -262,20 +2790,20 @@ "assembled": { "elements": [ { - "label": "text", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 89.24, - "t": 717.17, - "r": 523.21, - "b": 764.9, + "l": 240.9, + "t": 277.42, + "r": 722.95, + "b": 524.35, "coord_origin": "TOPLEFT" }, - "confidence": 0.732, + "confidence": 0.979, "cells": [ { "index": 0, @@ -286,20 +2814,20 @@ "a": 255 }, "rect": { - "r_x0": 89.24, - "r_y0": 764.9, - "r_x1": 521.99, - "r_y1": 764.9, - "r_x2": 521.99, - "r_y2": 744.09, - "r_x3": 89.24, - "r_y3": 744.09, + "r_x0": 694.67, + "r_y0": 485.67, + "r_x1": 602.0, + "r_y1": 485.67, + "r_x2": 602.0, + "r_y2": 506.33, + "r_x3": 694.67, + "r_y3": 506.33, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -311,43 +2839,22 @@ "a": 255 }, "rect": { - "r_x0": 89.24, - "r_y0": 739.2, - "r_x1": 523.21, - "r_y1": 739.2, - "r_x2": 523.21, - "r_y2": 717.17, - "r_x3": 89.24, - "r_y3": 717.17, + "r_x0": 680.0, + "r_y0": 454.33, + "r_x1": 610.0, + "r_y1": 454.33, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true - } - ], - "children": [] - }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "label": "text", - "id": 2, - "page_no": 0, - "cluster": { - "id": 2, - "label": "text", - "bbox": { - "l": 441.26, - "t": 690.04, - "r": 522.03, - "b": 710.03, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.598, - "cells": [ + }, { "index": 2, "rgba": { @@ -357,44 +2864,1199 @@ "a": 255 }, "rect": { - "r_x0": 441.26, - "r_y0": 710.03, - "r_x1": 522.03, - "r_y1": 710.03, - "r_x2": 522.03, - "r_y2": 690.04, - "r_x3": 441.26, - "r_y3": 690.04, + "r_x0": 670.33, + "r_y0": 392.0, + "r_x1": 623.33, + "r_y1": 392.0, + "r_x2": 623.33, + "r_y2": 408.0, + "r_x3": 670.33, + "r_y3": 408.0, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.33, + "r_y0": 312.0, + "r_x1": 623.33, + "r_y1": 312.0, + "r_x2": 623.33, + "r_y2": 328.0, + "r_x3": 670.33, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.67, + "r_y0": 485.67, + "r_x1": 428.33, + "r_y1": 485.67, + "r_x2": 428.33, + "r_y2": 506.33, + "r_x3": 559.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.67, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.67, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.33, + "r_y1": 408.0, + "r_x2": 442.33, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.33, + "r_y1": 328.0, + "r_x2": 442.33, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.67, + "r_x1": 468.0, + "r_y1": 296.67, + "r_x2": 468.0, + "r_y2": 312.67, + "r_x3": 515.0, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.67, + "r_y0": 490.33, + "r_x1": 274.0, + "r_y1": 490.33, + "r_x2": 274.0, + "r_y2": 506.33, + "r_x3": 385.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.67, + "r_y1": 459.0, + "r_x2": 292.67, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.67, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 376.0, + "r_x1": 304.33, + "r_y1": 376.0, + "r_x2": 304.33, + "r_y2": 392.0, + "r_x3": 351.33, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.67, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 296.67, + "r_x1": 304.33, + "r_y1": 296.67, + "r_x2": 304.33, + "r_y2": 312.67, + "r_x3": 351.33, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 602.0, + "t": 485.67, + "r": 694.67, + "b": 506.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 694.67, + "r_y0": 485.67, + "r_x1": 602.0, + "r_y1": 485.67, + "r_x2": 602.0, + "r_y2": 506.33, + "r_x3": 694.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 610.0, + "t": 454.33, + "r": 680.0, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 680.0, + "r_y0": 454.33, + "r_x1": 610.0, + "r_y1": 454.33, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 623.33, + "t": 392.0, + "r": 670.33, + "b": 408.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.33, + "r_y0": 392.0, + "r_x1": 623.33, + "r_y1": 392.0, + "r_x2": 623.33, + "r_y2": 408.0, + "r_x3": 670.33, + "r_y3": 408.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 623.33, + "t": 312.0, + "r": 670.33, + "b": 328.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.33, + "r_y0": 312.0, + "r_x1": 623.33, + "r_y1": 312.0, + "r_x2": 623.33, + "r_y2": 328.0, + "r_x3": 670.33, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 428.33, + "t": 485.67, + "r": 559.67, + "b": 506.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.67, + "r_y0": 485.67, + "r_x1": 428.33, + "r_y1": 485.67, + "r_x2": 428.33, + "r_y2": 506.33, + "r_x3": 559.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 456.0, + "t": 459.0, + "r": 526.67, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.67, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.67, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 442.33, + "t": 408.0, + "r": 545.0, + "b": 424.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 91.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.33, + "r_y1": 408.0, + "r_x2": 442.33, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 468.0, + "t": 376.0, + "r": 515.0, + "b": 392.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 442.33, + "t": 328.0, + "r": 545.0, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.33, + "r_y1": 328.0, + "r_x2": 442.33, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 468.0, + "t": 296.67, + "r": 515.0, + "b": 312.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.67, + "r_x1": 468.0, + "r_y1": 296.67, + "r_x2": 468.0, + "r_y2": 312.67, + "r_x3": 515.0, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 274.0, + "t": 490.33, + "r": 385.67, + "b": 506.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.67, + "r_y0": 490.33, + "r_x1": 274.0, + "r_y1": 490.33, + "r_x2": 274.0, + "r_y2": 506.33, + "r_x3": 385.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 292.67, + "t": 459.0, + "r": 363.0, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.67, + "r_y1": 459.0, + "r_x2": 292.67, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 278.0, + "t": 408.0, + "r": 383.67, + "b": 424.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.67, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 304.33, + "t": 376.0, + "r": 351.33, + "b": 392.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 376.0, + "r_x1": 304.33, + "r_y1": 376.0, + "r_x2": 304.33, + "r_y2": 392.0, + "r_x3": 351.33, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 278.0, + "t": 328.0, + "r": 383.67, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.67, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 304.33, + "t": 296.67, + "r": 351.33, + "b": 312.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 296.67, + "r_x1": 304.33, + "r_y1": 296.67, + "r_x2": 304.33, + "r_y2": 312.67, + "r_x3": 351.33, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "package" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 3, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 97.33, + "t": 105.67, + "r": 190.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Vertically merged", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.67, + "t": 204.0, + "r": 168.67, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.67, + "t": 284.0, + "r": 168.67, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.33, + "t": 105.67, + "r": 363.67, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Other merged column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 188.0, + "r": 349.67, + "b": 204.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.67, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.33, + "t": 105.67, + "r": 518.0, + "b": 121.67, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 188.0, + "r": 514.0, + "b": 204.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } ], "body": [ { - "label": "text", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "text", + "label": "table", "bbox": { - "l": 89.24, - "t": 717.17, - "r": 523.21, - "b": 764.9, + "l": 240.9, + "t": 277.42, + "r": 722.95, + "b": 524.35, "coord_origin": "TOPLEFT" }, - "confidence": 0.732, + "confidence": 0.979, "cells": [ { "index": 0, @@ -405,20 +4067,20 @@ "a": 255 }, "rect": { - "r_x0": 89.24, - "r_y0": 764.9, - "r_x1": 521.99, - "r_y1": 764.9, - "r_x2": 521.99, - "r_y2": 744.09, - "r_x3": 89.24, - "r_y3": 744.09, + "r_x0": 694.67, + "r_y0": 485.67, + "r_x1": 602.0, + "r_y1": 485.67, + "r_x2": 602.0, + "r_y2": 506.33, + "r_x3": 694.67, + "r_y3": 506.33, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -430,43 +4092,22 @@ "a": 255 }, "rect": { - "r_x0": 89.24, - "r_y0": 739.2, - "r_x1": 523.21, - "r_y1": 739.2, - "r_x2": 523.21, - "r_y2": 717.17, - "r_x3": 89.24, - "r_y3": 717.17, + "r_x0": 680.0, + "r_y0": 454.33, + "r_x1": 610.0, + "r_y1": 454.33, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true - } - ], - "children": [] - }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "label": "text", - "id": 2, - "page_no": 0, - "cluster": { - "id": 2, - "label": "text", - "bbox": { - "l": 441.26, - "t": 690.04, - "r": 522.03, - "b": 710.03, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.598, - "cells": [ + }, { "index": 2, "rgba": { @@ -476,26 +4117,1181 @@ "a": 255 }, "rect": { - "r_x0": 441.26, - "r_y0": 710.03, - "r_x1": 522.03, - "r_y1": 710.03, - "r_x2": 522.03, - "r_y2": 690.04, - "r_x3": 441.26, - "r_y3": 690.04, + "r_x0": 670.33, + "r_y0": 392.0, + "r_x1": 623.33, + "r_y1": 392.0, + "r_x2": 623.33, + "r_y2": 408.0, + "r_x3": 670.33, + "r_y3": 408.0, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.33, + "r_y0": 312.0, + "r_x1": 623.33, + "r_y1": 312.0, + "r_x2": 623.33, + "r_y2": 328.0, + "r_x3": 670.33, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.67, + "r_y0": 485.67, + "r_x1": 428.33, + "r_y1": 485.67, + "r_x2": 428.33, + "r_y2": 506.33, + "r_x3": 559.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.67, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.67, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.33, + "r_y1": 408.0, + "r_x2": 442.33, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.33, + "r_y1": 328.0, + "r_x2": 442.33, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.67, + "r_x1": 468.0, + "r_y1": 296.67, + "r_x2": 468.0, + "r_y2": 312.67, + "r_x3": 515.0, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.67, + "r_y0": 490.33, + "r_x1": 274.0, + "r_y1": 490.33, + "r_x2": 274.0, + "r_y2": 506.33, + "r_x3": 385.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.67, + "r_y1": 459.0, + "r_x2": 292.67, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.67, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 376.0, + "r_x1": 304.33, + "r_y1": 376.0, + "r_x2": 304.33, + "r_y2": 392.0, + "r_x3": 351.33, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.67, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 296.67, + "r_x1": 304.33, + "r_y1": 296.67, + "r_x2": 304.33, + "r_y2": 312.67, + "r_x3": 351.33, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 602.0, + "t": 485.67, + "r": 694.67, + "b": 506.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 694.67, + "r_y0": 485.67, + "r_x1": 602.0, + "r_y1": 485.67, + "r_x2": 602.0, + "r_y2": 506.33, + "r_x3": 694.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 610.0, + "t": 454.33, + "r": 680.0, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 680.0, + "r_y0": 454.33, + "r_x1": 610.0, + "r_y1": 454.33, + "r_x2": 610.0, + "r_y2": 475.0, + "r_x3": 680.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 623.33, + "t": 392.0, + "r": 670.33, + "b": 408.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.33, + "r_y0": 392.0, + "r_x1": 623.33, + "r_y1": 392.0, + "r_x2": 623.33, + "r_y2": 408.0, + "r_x3": 670.33, + "r_y3": 408.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 623.33, + "t": 312.0, + "r": 670.33, + "b": 328.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 670.33, + "r_y0": 312.0, + "r_x1": 623.33, + "r_y1": 312.0, + "r_x2": 623.33, + "r_y2": 328.0, + "r_x3": 670.33, + "r_y3": 328.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 428.33, + "t": 485.67, + "r": 559.67, + "b": 506.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 559.67, + "r_y0": 485.67, + "r_x1": 428.33, + "r_y1": 485.67, + "r_x2": 428.33, + "r_y2": 506.33, + "r_x3": 559.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 456.0, + "t": 459.0, + "r": 526.67, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 526.67, + "r_y0": 459.0, + "r_x1": 456.0, + "r_y1": 459.0, + "r_x2": 456.0, + "r_y2": 475.0, + "r_x3": 526.67, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 442.33, + "t": 408.0, + "r": 545.0, + "b": 424.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 91.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 408.0, + "r_x1": 442.33, + "r_y1": 408.0, + "r_x2": 442.33, + "r_y2": 424.0, + "r_x3": 545.0, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 468.0, + "t": 376.0, + "r": 515.0, + "b": 392.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 376.0, + "r_x1": 468.0, + "r_y1": 376.0, + "r_x2": 468.0, + "r_y2": 392.0, + "r_x3": 515.0, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 442.33, + "t": 328.0, + "r": 545.0, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 545.0, + "r_y0": 328.0, + "r_x1": 442.33, + "r_y1": 328.0, + "r_x2": 442.33, + "r_y2": 344.0, + "r_x3": 545.0, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 468.0, + "t": 296.67, + "r": 515.0, + "b": 312.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 515.0, + "r_y0": 296.67, + "r_x1": 468.0, + "r_y1": 296.67, + "r_x2": 468.0, + "r_y2": 312.67, + "r_x3": 515.0, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 274.0, + "t": 490.33, + "r": 385.67, + "b": 506.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 385.67, + "r_y0": 490.33, + "r_x1": 274.0, + "r_y1": 490.33, + "r_x2": 274.0, + "r_y2": 506.33, + "r_x3": 385.67, + "r_y3": 506.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 292.67, + "t": 459.0, + "r": 363.0, + "b": 475.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 363.0, + "r_y0": 459.0, + "r_x1": 292.67, + "r_y1": 459.0, + "r_x2": 292.67, + "r_y2": 475.0, + "r_x3": 363.0, + "r_y3": 475.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 278.0, + "t": 408.0, + "r": 383.67, + "b": 424.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 408.0, + "r_x1": 278.0, + "r_y1": 408.0, + "r_x2": 278.0, + "r_y2": 424.0, + "r_x3": 383.67, + "r_y3": 424.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 304.33, + "t": 376.0, + "r": 351.33, + "b": 392.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 376.0, + "r_x1": 304.33, + "r_y1": 376.0, + "r_x2": 304.33, + "r_y2": 392.0, + "r_x3": 351.33, + "r_y3": 392.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 278.0, + "t": 328.0, + "r": 383.67, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 383.67, + "r_y0": 328.0, + "r_x1": 278.0, + "r_y1": 328.0, + "r_x2": 278.0, + "r_y2": 344.0, + "r_x3": 383.67, + "r_y3": 344.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 304.33, + "t": 296.67, + "r": 351.33, + "b": 312.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 351.33, + "r_y0": 296.67, + "r_x1": 304.33, + "r_y1": 296.67, + "r_x2": 304.33, + "r_y2": 312.67, + "r_x3": 351.33, + "r_y3": 312.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "package" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 3, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 97.33, + "t": 105.67, + "r": 190.0, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Vertically merged", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.67, + "t": 204.0, + "r": 168.67, + "b": 220.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 121.67, + "t": 284.0, + "r": 168.67, + "b": 300.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.33, + "t": 105.67, + "r": 363.67, + "b": 126.33, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Other merged column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 188.0, + "r": 349.67, + "b": 204.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.0, + "t": 268.0, + "r": 349.67, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.33, + "t": 105.67, + "r": 518.0, + "b": 121.67, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another column", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 188.0, + "r": 514.0, + "b": 204.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.33, + "t": 268.0, + "r": 514.0, + "b": 284.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } ], "headers": [] diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.doctags.txt b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.doctags.txt index 70ee51c4..d693f790 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.doctags.txt +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.doctags.txt @@ -1,3 +1,2 @@ -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained -package +Yet another valueSome other valuevalue \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.json index e4bae43c..9bc6ea25 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.json @@ -4,7 +4,7 @@ "name": "ocr_test_rotated_270", "origin": { "mimetype": "application/pdf", - "binary_hash": 10890858393843077593, + "binary_hash": 8365439800722100027, "filename": "ocr_test_rotated_270.pdf" }, "furniture": { @@ -18,10 +18,7 @@ "self_ref": "#/body", "children": [ { - "$ref": "#/texts/0" - }, - { - "$ref": "#/texts/1" + "$ref": "#/tables/0" } ], "content_layer": "body", @@ -29,71 +26,170 @@ "label": "unspecified" }, "groups": [], - "texts": [ + "texts": [], + "pictures": [], + "tables": [ { - "self_ref": "#/texts/0", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "furniture", - "label": "page_header", - "prov": [ - { - "page_no": 1, - "bbox": { - "l": 717.17, - "t": 524.3, - "r": 764.9, - "b": 90.33, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 86 - ] - } - ], - "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "self_ref": "#/texts/1", + "self_ref": "#/tables/0", "parent": { "$ref": "#/body" }, "children": [], "content_layer": "body", - "label": "text", + "label": "table", "prov": [ { "page_no": 1, "bbox": { - "l": 690.24, - "t": 523.08, - "r": 709.83, - "b": 442.39, + "l": 277.42, + "t": 722.96, + "r": 524.35, + "b": 240.9, "coord_origin": "BOTTOMLEFT" }, "charspan": [ 0, - 7 + 0 ] } ], - "orig": "package", - "text": "package" + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "bbox": { + "l": 443.33, + "t": 312.0, + "r": 490.33, + "b": 328.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 262.33, + "t": 296.67, + "r": 365.0, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 98.0, + "t": 296.67, + "r": 203.67, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 1, + "num_cols": 3, + "grid": [ + [ + { + "bbox": { + "l": 98.0, + "t": 296.67, + "r": 203.67, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 262.33, + "t": 296.67, + "r": 365.0, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 443.33, + "t": 312.0, + "r": 490.33, + "b": 328.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + }, + "annotations": [] } ], - "pictures": [], - "tables": [], "key_value_items": [], "form_items": [], "pages": { "1": { "size": { - "width": 841.92, - "height": 595.2 + "width": 612.0, + "height": 792.0 }, "page_no": 1 } diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.md b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.md index 597acc76..e69de29b 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.md +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.md @@ -1 +0,0 @@ -package \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.pages.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.pages.json index c4a13a3f..d77269ce 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.pages.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.pages.json @@ -2,8 +2,8 @@ { "page_no": 0, "size": { - "width": 841.92, - "height": 595.2 + "width": 612.0, + "height": 792.0 }, "parsed_page": { "dimension": { @@ -11,47 +11,47 @@ "rect": { "r_x0": 0.0, "r_y0": 0.0, - "r_x1": 595.2, + "r_x1": 792.0, "r_y1": 0.0, - "r_x2": 595.2, - "r_y2": 841.92, + "r_x2": 792.0, + "r_y2": 612.0, "r_x3": 0.0, - "r_y3": 841.92, + "r_y3": 612.0, "coord_origin": "BOTTOMLEFT" }, "boundary_type": "crop_box", "art_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "bleed_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "crop_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "media_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "trim_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" } @@ -69,20 +69,20 @@ "a": 255 }, "rect": { - "r_x0": 744.09, - "r_y0": 504.87, - "r_x1": 764.9, - "r_y1": 504.87, - "r_x2": 764.9, - "r_y2": 73.35, - "r_x3": 744.09, - "r_y3": 73.35, + "r_x0": 485.67, + "r_y0": 97.33, + "r_x1": 485.67, + "r_y1": 190.0, + "r_x2": 506.33, + "r_y2": 190.0, + "r_x3": 506.33, + "r_y3": 97.33, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 89.0, "from_ocr": true }, { @@ -94,20 +94,20 @@ "a": 255 }, "rect": { - "r_x0": 717.17, - "r_y0": 504.87, - "r_x1": 737.97, - "r_y1": 504.87, - "r_x2": 737.97, - "r_y2": 70.9, - "r_x3": 717.17, - "r_y3": 70.9, + "r_x0": 454.33, + "r_y0": 112.0, + "r_x1": 454.33, + "r_y1": 182.33, + "r_x2": 475.0, + "r_y2": 182.33, + "r_x3": 475.0, + "r_y3": 112.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true }, { @@ -119,20 +119,345 @@ "a": 255 }, "rect": { - "r_x0": 690.24, - "r_y0": 152.81, - "r_x1": 709.83, - "r_y1": 152.81, - "r_x2": 709.83, - "r_y2": 72.12, - "r_x3": 690.24, - "r_y3": 72.12, + "r_x0": 392.0, + "r_y0": 121.67, + "r_x1": 392.0, + "r_y1": 168.67, + "r_x2": 408.0, + "r_y2": 168.67, + "r_x3": 408.0, + "r_y3": 121.67, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.67, + "r_x1": 312.0, + "r_y1": 168.67, + "r_x2": 328.0, + "r_y2": 168.67, + "r_x3": 328.0, + "r_y3": 121.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.67, + "r_y0": 232.67, + "r_x1": 485.67, + "r_y1": 364.0, + "r_x2": 506.33, + "r_y2": 364.0, + "r_x3": 506.33, + "r_y3": 232.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.67, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.67, + "r_x2": 424.0, + "r_y2": 349.67, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.33, + "r_x2": 392.0, + "r_y2": 324.33, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.67, + "r_x2": 344.0, + "r_y2": 349.67, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 277.33, + "r_x1": 296.67, + "r_y1": 324.33, + "r_x2": 312.67, + "r_y2": 324.33, + "r_x3": 312.67, + "r_y3": 277.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33, + "r_y0": 406.33, + "r_x1": 490.33, + "r_y1": 518.33, + "r_x2": 506.33, + "r_y2": 518.33, + "r_x3": 506.33, + "r_y3": 406.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.33, + "r_x1": 459.0, + "r_y1": 499.33, + "r_x2": 475.0, + "r_y2": 499.33, + "r_x3": 475.0, + "r_y3": 429.33, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.33, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.67, + "r_x1": 376.0, + "r_y1": 487.67, + "r_x2": 392.0, + "r_y2": 487.67, + "r_x3": 392.0, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.33, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 440.67, + "r_x1": 296.67, + "r_y1": 487.67, + "r_x2": 312.67, + "r_y2": 487.67, + "r_x3": 312.67, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], @@ -147,15 +472,15 @@ "clusters": [ { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 717.17, - "t": 70.9, - "r": 764.9, - "b": 504.87, + "l": 277.42, + "t": 69.04, + "r": 524.35, + "b": 551.1, "coord_origin": "TOPLEFT" }, - "confidence": 0.692, + "confidence": 0.979, "cells": [ { "index": 0, @@ -166,20 +491,20 @@ "a": 255 }, "rect": { - "r_x0": 744.09, - "r_y0": 504.87, - "r_x1": 764.9, - "r_y1": 504.87, - "r_x2": 764.9, - "r_y2": 73.35, - "r_x3": 744.09, - "r_y3": 73.35, + "r_x0": 485.67, + "r_y0": 97.33, + "r_x1": 485.67, + "r_y1": 190.0, + "r_x2": 506.33, + "r_y2": 190.0, + "r_x3": 506.33, + "r_y3": 97.33, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 89.0, "from_ocr": true }, { @@ -191,37 +516,22 @@ "a": 255 }, "rect": { - "r_x0": 717.17, - "r_y0": 504.87, - "r_x1": 737.97, - "r_y1": 504.87, - "r_x2": 737.97, - "r_y2": 70.9, - "r_x3": 717.17, - "r_y3": 70.9, + "r_x0": 454.33, + "r_y0": 112.0, + "r_x1": 454.33, + "r_y1": 182.33, + "r_x2": 475.0, + "r_y2": 182.33, + "r_x3": 475.0, + "r_y3": 112.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true - } - ], - "children": [] - }, - { - "id": 8, - "label": "text", - "bbox": { - "l": 690.24, - "t": 72.12, - "r": 709.83, - "b": 152.81, - "coord_origin": "TOPLEFT" - }, - "confidence": 1.0, - "cells": [ + }, { "index": 2, "rgba": { @@ -231,29 +541,2133 @@ "a": 255 }, "rect": { - "r_x0": 690.24, - "r_y0": 152.81, - "r_x1": 709.83, - "r_y1": 152.81, - "r_x2": 709.83, - "r_y2": 72.12, - "r_x3": 690.24, - "r_y3": 72.12, + "r_x0": 392.0, + "r_y0": 121.67, + "r_x1": 392.0, + "r_y1": 168.67, + "r_x2": 408.0, + "r_y2": 168.67, + "r_x3": 408.0, + "r_y3": 121.67, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.67, + "r_x1": 312.0, + "r_y1": 168.67, + "r_x2": 328.0, + "r_y2": 168.67, + "r_x3": 328.0, + "r_y3": 121.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.67, + "r_y0": 232.67, + "r_x1": 485.67, + "r_y1": 364.0, + "r_x2": 506.33, + "r_y2": 364.0, + "r_x3": 506.33, + "r_y3": 232.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.67, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.67, + "r_x2": 424.0, + "r_y2": 349.67, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.33, + "r_x2": 392.0, + "r_y2": 324.33, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.67, + "r_x2": 344.0, + "r_y2": 349.67, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 277.33, + "r_x1": 296.67, + "r_y1": 324.33, + "r_x2": 312.67, + "r_y2": 324.33, + "r_x3": 312.67, + "r_y3": 277.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33, + "r_y0": 406.33, + "r_x1": 490.33, + "r_y1": 518.33, + "r_x2": 506.33, + "r_y2": 518.33, + "r_x3": 506.33, + "r_y3": 406.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.33, + "r_x1": 459.0, + "r_y1": 499.33, + "r_x2": 475.0, + "r_y2": 499.33, + "r_x3": 475.0, + "r_y3": 429.33, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.33, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.67, + "r_x1": 376.0, + "r_y1": 487.67, + "r_x2": 392.0, + "r_y2": 487.67, + "r_x3": 392.0, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.33, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 440.67, + "r_x1": 296.67, + "r_y1": 487.67, + "r_x2": 312.67, + "r_y2": 487.67, + "r_x3": 312.67, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 485.67, + "t": 97.33, + "r": 506.33, + "b": 190.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.67, + "r_y0": 97.33, + "r_x1": 485.67, + "r_y1": 190.0, + "r_x2": 506.33, + "r_y2": 190.0, + "r_x3": 506.33, + "r_y3": 97.33, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 454.33, + "t": 112.0, + "r": 475.0, + "b": 182.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 454.33, + "r_y0": 112.0, + "r_x1": 454.33, + "r_y1": 182.33, + "r_x2": 475.0, + "r_y2": 182.33, + "r_x3": 475.0, + "r_y3": 112.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 392.0, + "t": 121.67, + "r": 408.0, + "b": 168.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 392.0, + "r_y0": 121.67, + "r_x1": 392.0, + "r_y1": 168.67, + "r_x2": 408.0, + "r_y2": 168.67, + "r_x3": 408.0, + "r_y3": 121.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 312.0, + "t": 121.67, + "r": 328.0, + "b": 168.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.67, + "r_x1": 312.0, + "r_y1": 168.67, + "r_x2": 328.0, + "r_y2": 168.67, + "r_x3": 328.0, + "r_y3": 121.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 485.67, + "t": 232.67, + "r": 506.33, + "b": 364.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.67, + "r_y0": 232.67, + "r_x1": 485.67, + "r_y1": 364.0, + "r_x2": 506.33, + "r_y2": 364.0, + "r_x3": 506.33, + "r_y3": 232.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 459.0, + "t": 265.67, + "r": 475.0, + "b": 336.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.67, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 408.0, + "t": 247.0, + "r": 424.0, + "b": 349.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.67, + "r_x2": 424.0, + "r_y2": 349.67, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 376.0, + "t": 277.0, + "r": 392.0, + "b": 324.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.33, + "r_x2": 392.0, + "r_y2": 324.33, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 328.0, + "t": 247.0, + "r": 344.0, + "b": 349.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.67, + "r_x2": 344.0, + "r_y2": 349.67, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 296.67, + "t": 277.33, + "r": 312.67, + "b": 324.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 277.33, + "r_x1": 296.67, + "r_y1": 324.33, + "r_x2": 312.67, + "r_y2": 324.33, + "r_x3": 312.67, + "r_y3": 277.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 490.33, + "t": 406.33, + "r": 506.33, + "b": 518.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33, + "r_y0": 406.33, + "r_x1": 490.33, + "r_y1": 518.33, + "r_x2": 506.33, + "r_y2": 518.33, + "r_x3": 506.33, + "r_y3": 406.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 459.0, + "t": 429.33, + "r": 475.0, + "b": 499.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.33, + "r_x1": 459.0, + "r_y1": 499.33, + "r_x2": 475.0, + "r_y2": 499.33, + "r_x3": 475.0, + "r_y3": 429.33, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 408.0, + "t": 408.33, + "r": 424.0, + "b": 514.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.33, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 376.0, + "t": 440.67, + "r": 392.0, + "b": 487.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.67, + "r_x1": 376.0, + "r_y1": 487.67, + "r_x2": 392.0, + "r_y2": 487.67, + "r_x3": 392.0, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 328.0, + "t": 408.33, + "r": 344.0, + "b": 514.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.33, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 296.67, + "t": 440.67, + "r": 312.67, + "b": 487.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 440.67, + "r_x1": 296.67, + "r_y1": 487.67, + "r_x2": 312.67, + "r_y2": 487.67, + "r_x3": 312.67, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] } ] }, "tablestructure": { - "table_map": {} + "table_map": { + "0": { + "label": "table", + "id": 0, + "page_no": 0, + "cluster": { + "id": 0, + "label": "table", + "bbox": { + "l": 277.42, + "t": 69.04, + "r": 524.35, + "b": 551.1, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.979, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.67, + "r_y0": 97.33, + "r_x1": 485.67, + "r_y1": 190.0, + "r_x2": 506.33, + "r_y2": 190.0, + "r_x3": 506.33, + "r_y3": 97.33, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + }, + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 454.33, + "r_y0": 112.0, + "r_x1": 454.33, + "r_y1": 182.33, + "r_x2": 475.0, + "r_y2": 182.33, + "r_x3": 475.0, + "r_y3": 112.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 392.0, + "r_y0": 121.67, + "r_x1": 392.0, + "r_y1": 168.67, + "r_x2": 408.0, + "r_y2": 168.67, + "r_x3": 408.0, + "r_y3": 121.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.67, + "r_x1": 312.0, + "r_y1": 168.67, + "r_x2": 328.0, + "r_y2": 168.67, + "r_x3": 328.0, + "r_y3": 121.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.67, + "r_y0": 232.67, + "r_x1": 485.67, + "r_y1": 364.0, + "r_x2": 506.33, + "r_y2": 364.0, + "r_x3": 506.33, + "r_y3": 232.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.67, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.67, + "r_x2": 424.0, + "r_y2": 349.67, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.33, + "r_x2": 392.0, + "r_y2": 324.33, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.67, + "r_x2": 344.0, + "r_y2": 349.67, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 277.33, + "r_x1": 296.67, + "r_y1": 324.33, + "r_x2": 312.67, + "r_y2": 324.33, + "r_x3": 312.67, + "r_y3": 277.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33, + "r_y0": 406.33, + "r_x1": 490.33, + "r_y1": 518.33, + "r_x2": 506.33, + "r_y2": 518.33, + "r_x3": 506.33, + "r_y3": 406.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.33, + "r_x1": 459.0, + "r_y1": 499.33, + "r_x2": 475.0, + "r_y2": 499.33, + "r_x3": 475.0, + "r_y3": 429.33, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.33, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.67, + "r_x1": 376.0, + "r_y1": 487.67, + "r_x2": 392.0, + "r_y2": 487.67, + "r_x3": 392.0, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.33, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 440.67, + "r_x1": 296.67, + "r_y1": 487.67, + "r_x2": 312.67, + "r_y2": 487.67, + "r_x3": 312.67, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 485.67, + "t": 97.33, + "r": 506.33, + "b": 190.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.67, + "r_y0": 97.33, + "r_x1": 485.67, + "r_y1": 190.0, + "r_x2": 506.33, + "r_y2": 190.0, + "r_x3": 506.33, + "r_y3": 97.33, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 454.33, + "t": 112.0, + "r": 475.0, + "b": 182.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 454.33, + "r_y0": 112.0, + "r_x1": 454.33, + "r_y1": 182.33, + "r_x2": 475.0, + "r_y2": 182.33, + "r_x3": 475.0, + "r_y3": 112.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 392.0, + "t": 121.67, + "r": 408.0, + "b": 168.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 392.0, + "r_y0": 121.67, + "r_x1": 392.0, + "r_y1": 168.67, + "r_x2": 408.0, + "r_y2": 168.67, + "r_x3": 408.0, + "r_y3": 121.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 312.0, + "t": 121.67, + "r": 328.0, + "b": 168.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.67, + "r_x1": 312.0, + "r_y1": 168.67, + "r_x2": 328.0, + "r_y2": 168.67, + "r_x3": 328.0, + "r_y3": 121.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 485.67, + "t": 232.67, + "r": 506.33, + "b": 364.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.67, + "r_y0": 232.67, + "r_x1": 485.67, + "r_y1": 364.0, + "r_x2": 506.33, + "r_y2": 364.0, + "r_x3": 506.33, + "r_y3": 232.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 459.0, + "t": 265.67, + "r": 475.0, + "b": 336.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.67, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 408.0, + "t": 247.0, + "r": 424.0, + "b": 349.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.67, + "r_x2": 424.0, + "r_y2": 349.67, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 376.0, + "t": 277.0, + "r": 392.0, + "b": 324.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.33, + "r_x2": 392.0, + "r_y2": 324.33, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 328.0, + "t": 247.0, + "r": 344.0, + "b": 349.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.67, + "r_x2": 344.0, + "r_y2": 349.67, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 296.67, + "t": 277.33, + "r": 312.67, + "b": 324.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 277.33, + "r_x1": 296.67, + "r_y1": 324.33, + "r_x2": 312.67, + "r_y2": 324.33, + "r_x3": 312.67, + "r_y3": 277.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 490.33, + "t": 406.33, + "r": 506.33, + "b": 518.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33, + "r_y0": 406.33, + "r_x1": 490.33, + "r_y1": 518.33, + "r_x2": 506.33, + "r_y2": 518.33, + "r_x3": 506.33, + "r_y3": 406.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 459.0, + "t": 429.33, + "r": 475.0, + "b": 499.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.33, + "r_x1": 459.0, + "r_y1": 499.33, + "r_x2": 475.0, + "r_y2": 499.33, + "r_x3": 475.0, + "r_y3": 429.33, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 408.0, + "t": 408.33, + "r": 424.0, + "b": 514.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.33, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 376.0, + "t": 440.67, + "r": 392.0, + "b": 487.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.67, + "r_x1": 376.0, + "r_y1": 487.67, + "r_x2": 392.0, + "r_y2": 487.67, + "r_x3": 392.0, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 328.0, + "t": 408.33, + "r": 344.0, + "b": 514.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.33, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 296.67, + "t": 440.67, + "r": 312.67, + "b": 487.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 440.67, + "r_x1": 296.67, + "r_y1": 487.67, + "r_x2": 312.67, + "r_y2": 487.67, + "r_x3": 312.67, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 1, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 443.33, + "t": 312.0, + "r": 490.33, + "b": 328.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 262.33, + "t": 296.67, + "r": 365.0, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 98.0, + "t": 296.67, + "r": 203.67, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + } + } }, "figures_classification": null, "equations_prediction": null, @@ -262,20 +2676,20 @@ "assembled": { "elements": [ { - "label": "page_header", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 717.17, - "t": 70.9, - "r": 764.9, - "b": 504.87, + "l": 277.42, + "t": 69.04, + "r": 524.35, + "b": 551.1, "coord_origin": "TOPLEFT" }, - "confidence": 0.692, + "confidence": 0.979, "cells": [ { "index": 0, @@ -286,20 +2700,20 @@ "a": 255 }, "rect": { - "r_x0": 744.09, - "r_y0": 504.87, - "r_x1": 764.9, - "r_y1": 504.87, - "r_x2": 764.9, - "r_y2": 73.35, - "r_x3": 744.09, - "r_y3": 73.35, + "r_x0": 485.67, + "r_y0": 97.33, + "r_x1": 485.67, + "r_y1": 190.0, + "r_x2": 506.33, + "r_y2": 190.0, + "r_x3": 506.33, + "r_y3": 97.33, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 89.0, "from_ocr": true }, { @@ -311,43 +2725,22 @@ "a": 255 }, "rect": { - "r_x0": 717.17, - "r_y0": 504.87, - "r_x1": 737.97, - "r_y1": 504.87, - "r_x2": 737.97, - "r_y2": 70.9, - "r_x3": 717.17, - "r_y3": 70.9, + "r_x0": 454.33, + "r_y0": 112.0, + "r_x1": 454.33, + "r_y1": 182.33, + "r_x2": 475.0, + "r_y2": 182.33, + "r_x3": 475.0, + "r_y3": 112.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true - } - ], - "children": [] - }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "label": "text", - "id": 8, - "page_no": 0, - "cluster": { - "id": 8, - "label": "text", - "bbox": { - "l": 690.24, - "t": 72.12, - "r": 709.83, - "b": 152.81, - "coord_origin": "TOPLEFT" - }, - "confidence": 1.0, - "cells": [ + }, { "index": 2, "rgba": { @@ -357,92 +2750,1085 @@ "a": 255 }, "rect": { - "r_x0": 690.24, - "r_y0": 152.81, - "r_x1": 709.83, - "r_y1": 152.81, - "r_x2": 709.83, - "r_y2": 72.12, - "r_x3": 690.24, - "r_y3": 72.12, + "r_x0": 392.0, + "r_y0": 121.67, + "r_x1": 392.0, + "r_y1": 168.67, + "r_x2": 408.0, + "r_y2": 168.67, + "r_x3": 408.0, + "r_y3": 121.67, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.67, + "r_x1": 312.0, + "r_y1": 168.67, + "r_x2": 328.0, + "r_y2": 168.67, + "r_x3": 328.0, + "r_y3": 121.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.67, + "r_y0": 232.67, + "r_x1": 485.67, + "r_y1": 364.0, + "r_x2": 506.33, + "r_y2": 364.0, + "r_x3": 506.33, + "r_y3": 232.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.67, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.67, + "r_x2": 424.0, + "r_y2": 349.67, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.33, + "r_x2": 392.0, + "r_y2": 324.33, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.67, + "r_x2": 344.0, + "r_y2": 349.67, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 277.33, + "r_x1": 296.67, + "r_y1": 324.33, + "r_x2": 312.67, + "r_y2": 324.33, + "r_x3": 312.67, + "r_y3": 277.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33, + "r_y0": 406.33, + "r_x1": 490.33, + "r_y1": 518.33, + "r_x2": 506.33, + "r_y2": 518.33, + "r_x3": 506.33, + "r_y3": 406.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.33, + "r_x1": 459.0, + "r_y1": 499.33, + "r_x2": 475.0, + "r_y2": 499.33, + "r_x3": 475.0, + "r_y3": 429.33, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.33, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.67, + "r_x1": 376.0, + "r_y1": 487.67, + "r_x2": 392.0, + "r_y2": 487.67, + "r_x3": 392.0, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.33, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 440.67, + "r_x1": 296.67, + "r_y1": 487.67, + "r_x2": 312.67, + "r_y2": 487.67, + "r_x3": 312.67, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 485.67, + "t": 97.33, + "r": 506.33, + "b": 190.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.67, + "r_y0": 97.33, + "r_x1": 485.67, + "r_y1": 190.0, + "r_x2": 506.33, + "r_y2": 190.0, + "r_x3": 506.33, + "r_y3": 97.33, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 454.33, + "t": 112.0, + "r": 475.0, + "b": 182.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 454.33, + "r_y0": 112.0, + "r_x1": 454.33, + "r_y1": 182.33, + "r_x2": 475.0, + "r_y2": 182.33, + "r_x3": 475.0, + "r_y3": 112.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 392.0, + "t": 121.67, + "r": 408.0, + "b": 168.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 392.0, + "r_y0": 121.67, + "r_x1": 392.0, + "r_y1": 168.67, + "r_x2": 408.0, + "r_y2": 168.67, + "r_x3": 408.0, + "r_y3": 121.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 312.0, + "t": 121.67, + "r": 328.0, + "b": 168.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.67, + "r_x1": 312.0, + "r_y1": 168.67, + "r_x2": 328.0, + "r_y2": 168.67, + "r_x3": 328.0, + "r_y3": 121.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 485.67, + "t": 232.67, + "r": 506.33, + "b": 364.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.67, + "r_y0": 232.67, + "r_x1": 485.67, + "r_y1": 364.0, + "r_x2": 506.33, + "r_y2": 364.0, + "r_x3": 506.33, + "r_y3": 232.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 459.0, + "t": 265.67, + "r": 475.0, + "b": 336.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.67, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 408.0, + "t": 247.0, + "r": 424.0, + "b": 349.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.67, + "r_x2": 424.0, + "r_y2": 349.67, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 376.0, + "t": 277.0, + "r": 392.0, + "b": 324.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.33, + "r_x2": 392.0, + "r_y2": 324.33, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 328.0, + "t": 247.0, + "r": 344.0, + "b": 349.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.67, + "r_x2": 344.0, + "r_y2": 349.67, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 296.67, + "t": 277.33, + "r": 312.67, + "b": 324.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 277.33, + "r_x1": 296.67, + "r_y1": 324.33, + "r_x2": 312.67, + "r_y2": 324.33, + "r_x3": 312.67, + "r_y3": 277.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 490.33, + "t": 406.33, + "r": 506.33, + "b": 518.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33, + "r_y0": 406.33, + "r_x1": 490.33, + "r_y1": 518.33, + "r_x2": 506.33, + "r_y2": 518.33, + "r_x3": 506.33, + "r_y3": 406.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 459.0, + "t": 429.33, + "r": 475.0, + "b": 499.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.33, + "r_x1": 459.0, + "r_y1": 499.33, + "r_x2": 475.0, + "r_y2": 499.33, + "r_x3": 475.0, + "r_y3": 429.33, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 408.0, + "t": 408.33, + "r": 424.0, + "b": 514.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.33, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 376.0, + "t": 440.67, + "r": 392.0, + "b": 487.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.67, + "r_x1": 376.0, + "r_y1": 487.67, + "r_x2": 392.0, + "r_y2": 487.67, + "r_x3": 392.0, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 328.0, + "t": 408.33, + "r": 344.0, + "b": 514.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.33, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 296.67, + "t": 440.67, + "r": 312.67, + "b": 487.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 440.67, + "r_x1": 296.67, + "r_y1": 487.67, + "r_x2": 312.67, + "r_y2": 487.67, + "r_x3": 312.67, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "package" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 1, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 443.33, + "t": 312.0, + "r": 490.33, + "b": 328.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 262.33, + "t": 296.67, + "r": 365.0, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 98.0, + "t": 296.67, + "r": 203.67, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } ], "body": [ { - "label": "text", - "id": 8, - "page_no": 0, - "cluster": { - "id": 8, - "label": "text", - "bbox": { - "l": 690.24, - "t": 72.12, - "r": 709.83, - "b": 152.81, - "coord_origin": "TOPLEFT" - }, - "confidence": 1.0, - "cells": [ - { - "index": 2, - "rgba": { - "r": 0, - "g": 0, - "b": 0, - "a": 255 - }, - "rect": { - "r_x0": 690.24, - "r_y0": 152.81, - "r_x1": 709.83, - "r_y1": 152.81, - "r_x2": 709.83, - "r_y2": 72.12, - "r_x3": 690.24, - "r_y3": 72.12, - "coord_origin": "TOPLEFT" - }, - "text": "package", - "orig": "package", - "text_direction": "left_to_right", - "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - "text": "package" - } - ], - "headers": [ - { - "label": "page_header", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 717.17, - "t": 70.9, - "r": 764.9, - "b": 504.87, + "l": 277.42, + "t": 69.04, + "r": 524.35, + "b": 551.1, "coord_origin": "TOPLEFT" }, - "confidence": 0.692, + "confidence": 0.979, "cells": [ { "index": 0, @@ -453,20 +3839,20 @@ "a": 255 }, "rect": { - "r_x0": 744.09, - "r_y0": 504.87, - "r_x1": 764.9, - "r_y1": 504.87, - "r_x2": 764.9, - "r_y2": 73.35, - "r_x3": 744.09, - "r_y3": 73.35, + "r_x0": 485.67, + "r_y0": 97.33, + "r_x1": 485.67, + "r_y1": 190.0, + "r_x2": 506.33, + "r_y2": 190.0, + "r_x3": 506.33, + "r_y3": 97.33, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 89.0, "from_ocr": true }, { @@ -478,28 +3864,1095 @@ "a": 255 }, "rect": { - "r_x0": 717.17, - "r_y0": 504.87, - "r_x1": 737.97, - "r_y1": 504.87, - "r_x2": 737.97, - "r_y2": 70.9, - "r_x3": 717.17, - "r_y3": 70.9, + "r_x0": 454.33, + "r_y0": 112.0, + "r_x1": 454.33, + "r_y1": 182.33, + "r_x2": 475.0, + "r_y2": 182.33, + "r_x3": 475.0, + "r_y3": 112.0, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 392.0, + "r_y0": 121.67, + "r_x1": 392.0, + "r_y1": 168.67, + "r_x2": 408.0, + "r_y2": 168.67, + "r_x3": 408.0, + "r_y3": 121.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.67, + "r_x1": 312.0, + "r_y1": 168.67, + "r_x2": 328.0, + "r_y2": 168.67, + "r_x3": 328.0, + "r_y3": 121.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.67, + "r_y0": 232.67, + "r_x1": 485.67, + "r_y1": 364.0, + "r_x2": 506.33, + "r_y2": 364.0, + "r_x3": 506.33, + "r_y3": 232.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.67, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.67, + "r_x2": 424.0, + "r_y2": 349.67, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.33, + "r_x2": 392.0, + "r_y2": 324.33, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.67, + "r_x2": 344.0, + "r_y2": 349.67, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 277.33, + "r_x1": 296.67, + "r_y1": 324.33, + "r_x2": 312.67, + "r_y2": 324.33, + "r_x3": 312.67, + "r_y3": 277.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33, + "r_y0": 406.33, + "r_x1": 490.33, + "r_y1": 518.33, + "r_x2": 506.33, + "r_y2": 518.33, + "r_x3": 506.33, + "r_y3": 406.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.33, + "r_x1": 459.0, + "r_y1": 499.33, + "r_x2": 475.0, + "r_y2": 499.33, + "r_x3": 475.0, + "r_y3": 429.33, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.33, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.67, + "r_x1": 376.0, + "r_y1": 487.67, + "r_x2": 392.0, + "r_y2": 487.67, + "r_x3": 392.0, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.33, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 440.67, + "r_x1": 296.67, + "r_y1": 487.67, + "r_x2": 312.67, + "r_y2": 487.67, + "r_x3": 312.67, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 485.67, + "t": 97.33, + "r": 506.33, + "b": 190.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.67, + "r_y0": 97.33, + "r_x1": 485.67, + "r_y1": 190.0, + "r_x2": 506.33, + "r_y2": 190.0, + "r_x3": 506.33, + "r_y3": 97.33, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 454.33, + "t": 112.0, + "r": 475.0, + "b": 182.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 454.33, + "r_y0": 112.0, + "r_x1": 454.33, + "r_y1": 182.33, + "r_x2": 475.0, + "r_y2": 182.33, + "r_x3": 475.0, + "r_y3": 112.0, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 392.0, + "t": 121.67, + "r": 408.0, + "b": 168.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 392.0, + "r_y0": 121.67, + "r_x1": 392.0, + "r_y1": 168.67, + "r_x2": 408.0, + "r_y2": 168.67, + "r_x3": 408.0, + "r_y3": 121.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 312.0, + "t": 121.67, + "r": 328.0, + "b": 168.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 312.0, + "r_y0": 121.67, + "r_x1": 312.0, + "r_y1": 168.67, + "r_x2": 328.0, + "r_y2": 168.67, + "r_x3": 328.0, + "r_y3": 121.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 485.67, + "t": 232.67, + "r": 506.33, + "b": 364.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 485.67, + "r_y0": 232.67, + "r_x1": 485.67, + "r_y1": 364.0, + "r_x2": 506.33, + "r_y2": 364.0, + "r_x3": 506.33, + "r_y3": 232.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 459.0, + "t": 265.67, + "r": 475.0, + "b": 336.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 265.67, + "r_x1": 459.0, + "r_y1": 336.0, + "r_x2": 475.0, + "r_y2": 336.0, + "r_x3": 475.0, + "r_y3": 265.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 408.0, + "t": 247.0, + "r": 424.0, + "b": 349.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 89.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 247.0, + "r_x1": 408.0, + "r_y1": 349.67, + "r_x2": 424.0, + "r_y2": 349.67, + "r_x3": 424.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 89.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 376.0, + "t": 277.0, + "r": 392.0, + "b": 324.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 277.0, + "r_x1": 376.0, + "r_y1": 324.33, + "r_x2": 392.0, + "r_y2": 324.33, + "r_x3": 392.0, + "r_y3": 277.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 328.0, + "t": 247.0, + "r": 344.0, + "b": 349.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 247.0, + "r_x1": 328.0, + "r_y1": 349.67, + "r_x2": 344.0, + "r_y2": 349.67, + "r_x3": 344.0, + "r_y3": 247.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 296.67, + "t": 277.33, + "r": 312.67, + "b": 324.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 277.33, + "r_x1": 296.67, + "r_y1": 324.33, + "r_x2": 312.67, + "r_y2": 324.33, + "r_x3": 312.67, + "r_y3": 277.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 490.33, + "t": 406.33, + "r": 506.33, + "b": 518.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 490.33, + "r_y0": 406.33, + "r_x1": 490.33, + "r_y1": 518.33, + "r_x2": 506.33, + "r_y2": 518.33, + "r_x3": 506.33, + "r_y3": 406.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 459.0, + "t": 429.33, + "r": 475.0, + "b": 499.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 459.0, + "r_y0": 429.33, + "r_x1": 459.0, + "r_y1": 499.33, + "r_x2": 475.0, + "r_y2": 499.33, + "r_x3": 475.0, + "r_y3": 429.33, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 408.0, + "t": 408.33, + "r": 424.0, + "b": 514.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 408.0, + "r_y0": 408.33, + "r_x1": 408.0, + "r_y1": 514.0, + "r_x2": 424.0, + "r_y2": 514.0, + "r_x3": 424.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 376.0, + "t": 440.67, + "r": 392.0, + "b": 487.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 376.0, + "r_y0": 440.67, + "r_x1": 376.0, + "r_y1": 487.67, + "r_x2": 392.0, + "r_y2": 487.67, + "r_x3": 392.0, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 328.0, + "t": 408.33, + "r": 344.0, + "b": 514.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 328.0, + "r_y0": 408.33, + "r_x1": 328.0, + "r_y1": 514.0, + "r_x2": 344.0, + "r_y2": 514.0, + "r_x3": 344.0, + "r_y3": 408.33, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 296.67, + "t": 440.67, + "r": 312.67, + "b": 487.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 296.67, + "r_y0": 440.67, + "r_x1": 296.67, + "r_y1": 487.67, + "r_x2": 312.67, + "r_y2": 487.67, + "r_x3": 312.67, + "r_y3": 440.67, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 1, + "num_cols": 3, + "table_cells": [ + { + "bbox": { + "l": 443.33, + "t": 312.0, + "r": 490.33, + "b": 328.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 262.33, + "t": 296.67, + "r": 365.0, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Some other value", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 98.0, + "t": 296.67, + "r": 203.67, + "b": 344.0, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Yet another value", + "column_header": false, + "row_header": false, + "row_section": false + } + ] } - ] + ], + "headers": [] } } ] \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.doctags.txt b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.doctags.txt index d8b87216..0b13f376 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.doctags.txt +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.doctags.txt @@ -1,3 +1,2 @@ -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained -package + \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.json index d1b4d37e..dd589a5b 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.json @@ -4,7 +4,7 @@ "name": "ocr_test_rotated_90", "origin": { "mimetype": "application/pdf", - "binary_hash": 6989291015361162334, + "binary_hash": 6752841177619701916, "filename": "ocr_test_rotated_90.pdf" }, "furniture": { @@ -18,10 +18,7 @@ "self_ref": "#/body", "children": [ { - "$ref": "#/texts/0" - }, - { - "$ref": "#/texts/1" + "$ref": "#/tables/0" } ], "content_layer": "body", @@ -29,71 +26,52 @@ "label": "unspecified" }, "groups": [], - "texts": [ + "texts": [], + "pictures": [], + "tables": [ { - "self_ref": "#/texts/0", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "furniture", - "label": "page_header", - "prov": [ - { - "page_no": 1, - "bbox": { - "l": 77.1, - "t": 506.07, - "r": 126.08, - "b": 71.88, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 86 - ] - } - ], - "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained", - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "self_ref": "#/texts/1", + "self_ref": "#/tables/0", "parent": { "$ref": "#/body" }, "children": [], "content_layer": "body", - "label": "text", + "label": "table", "prov": [ { "page_no": 1, "bbox": { - "l": 131.21, - "t": 154.19, - "r": 152.2, - "b": 74.12, + "l": 87.65, + "t": 551.1, + "r": 334.58, + "b": 69.04, "coord_origin": "BOTTOMLEFT" }, "charspan": [ 0, - 7 + 0 ] } ], - "orig": "package", - "text": "package" + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [], + "num_rows": 0, + "num_cols": 0, + "grid": [] + }, + "annotations": [] } ], - "pictures": [], - "tables": [], "key_value_items": [], "form_items": [], "pages": { "1": { "size": { - "width": 841.92, - "height": 595.2 + "width": 612.0, + "height": 792.0 }, "page_no": 1 } diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.md b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.md index 597acc76..e69de29b 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.md +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.md @@ -1 +0,0 @@ -package \ No newline at end of file diff --git a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.pages.json b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.pages.json index 250f4bf6..186a44d5 100644 --- a/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.pages.json +++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.pages.json @@ -2,8 +2,8 @@ { "page_no": 0, "size": { - "width": 841.92, - "height": 595.2 + "width": 612.0, + "height": 792.0 }, "parsed_page": { "dimension": { @@ -11,47 +11,47 @@ "rect": { "r_x0": 0.0, "r_y0": 0.0, - "r_x1": 595.2, + "r_x1": 792.0, "r_y1": 0.0, - "r_x2": 595.2, - "r_y2": 841.92, + "r_x2": 792.0, + "r_y2": 612.0, "r_x3": 0.0, - "r_y3": 841.92, + "r_y3": 612.0, "coord_origin": "BOTTOMLEFT" }, "boundary_type": "crop_box", "art_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "bleed_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "crop_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "media_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" }, "trim_bbox": { "l": 0.0, - "t": 841.92, - "r": 595.2, + "t": 612.0, + "r": 792.0, "b": 0.0, "coord_origin": "BOTTOMLEFT" } @@ -69,20 +69,20 @@ "a": 255 }, "rect": { - "r_x0": 77.1, - "r_y0": 520.76, - "r_x1": 96.68, - "r_y1": 520.76, - "r_x2": 96.68, - "r_y2": 89.24, - "r_x3": 77.1, - "r_y3": 89.24, + "r_x0": 126.33, + "r_y0": 694.67, + "r_x1": 126.33, + "r_y1": 602.0, + "r_x2": 105.67, + "r_y2": 602.0, + "r_x3": 105.67, + "r_y3": 694.67, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -94,20 +94,20 @@ "a": 255 }, "rect": { - "r_x0": 100.64, - "r_y0": 523.32, - "r_x1": 126.08, - "r_y1": 523.32, - "r_x2": 126.08, - "r_y2": 89.13, - "r_x3": 100.64, - "r_y3": 89.13, + "r_x0": 157.67, + "r_y0": 680.33, + "r_x1": 157.67, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.33, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true }, { @@ -119,20 +119,345 @@ "a": 255 }, "rect": { - "r_x0": 131.21, - "r_y0": 521.08, - "r_x1": 152.2, - "r_y1": 521.08, - "r_x2": 152.2, - "r_y2": 441.01, - "r_x3": 131.21, - "r_y3": 441.01, + "r_x0": 220.0, + "r_y0": 670.33, + "r_x1": 220.0, + "r_y1": 623.33, + "r_x2": 204.0, + "r_y2": 623.33, + "r_x3": 204.0, + "r_y3": 670.33, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.33, + "r_x1": 300.0, + "r_y1": 623.33, + "r_x2": 284.0, + "r_y2": 623.33, + "r_x3": 284.0, + "r_y3": 670.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33, + "r_y0": 559.67, + "r_x1": 126.33, + "r_y1": 428.0, + "r_x2": 105.67, + "r_y2": 428.0, + "r_x3": 105.67, + "r_y3": 559.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.67, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 545.0, + "r_x1": 204.33, + "r_y1": 442.33, + "r_x2": 188.33, + "r_y2": 442.33, + "r_x3": 188.33, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.33, + "r_x2": 268.0, + "r_y2": 442.33, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 515.0, + "r_x1": 315.33, + "r_y1": 468.0, + "r_x2": 299.67, + "r_y2": 468.0, + "r_x3": 299.67, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 385.67, + "r_x1": 121.67, + "r_y1": 274.0, + "r_x2": 105.67, + "r_y2": 274.0, + "r_x3": 105.67, + "r_y3": 385.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.67, + "r_x2": 137.0, + "r_y2": 292.67, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 384.0, + "r_x1": 204.33, + "r_y1": 278.0, + "r_x2": 188.33, + "r_y2": 278.0, + "r_x3": 188.33, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33, + "r_x1": 236.0, + "r_y1": 304.33, + "r_x2": 220.0, + "r_y2": 304.33, + "r_x3": 220.0, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.67, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 351.33, + "r_x1": 315.33, + "r_y1": 304.33, + "r_x2": 299.67, + "r_y2": 304.33, + "r_x3": 299.67, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], @@ -147,15 +472,15 @@ "clusters": [ { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 77.1, - "t": 89.13, - "r": 126.08, - "b": 523.32, + "l": 87.65, + "t": 240.9, + "r": 334.58, + "b": 722.96, "coord_origin": "TOPLEFT" }, - "confidence": 0.602, + "confidence": 0.979, "cells": [ { "index": 0, @@ -166,20 +491,20 @@ "a": 255 }, "rect": { - "r_x0": 77.1, - "r_y0": 520.76, - "r_x1": 96.68, - "r_y1": 520.76, - "r_x2": 96.68, - "r_y2": 89.24, - "r_x3": 77.1, - "r_y3": 89.24, + "r_x0": 126.33, + "r_y0": 694.67, + "r_x1": 126.33, + "r_y1": 602.0, + "r_x2": 105.67, + "r_y2": 602.0, + "r_x3": 105.67, + "r_y3": 694.67, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -191,37 +516,22 @@ "a": 255 }, "rect": { - "r_x0": 100.64, - "r_y0": 523.32, - "r_x1": 126.08, - "r_y1": 523.32, - "r_x2": 126.08, - "r_y2": 89.13, - "r_x3": 100.64, - "r_y3": 89.13, + "r_x0": 157.67, + "r_y0": 680.33, + "r_x1": 157.67, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.33, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true - } - ], - "children": [] - }, - { - "id": 1, - "label": "text", - "bbox": { - "l": 131.21, - "t": 441.01, - "r": 152.2, - "b": 521.08, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.523, - "cells": [ + }, { "index": 2, "rgba": { @@ -231,29 +541,2075 @@ "a": 255 }, "rect": { - "r_x0": 131.21, - "r_y0": 521.08, - "r_x1": 152.2, - "r_y1": 521.08, - "r_x2": 152.2, - "r_y2": 441.01, - "r_x3": 131.21, - "r_y3": 441.01, + "r_x0": 220.0, + "r_y0": 670.33, + "r_x1": 220.0, + "r_y1": 623.33, + "r_x2": 204.0, + "r_y2": 623.33, + "r_x3": 204.0, + "r_y3": 670.33, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.33, + "r_x1": 300.0, + "r_y1": 623.33, + "r_x2": 284.0, + "r_y2": 623.33, + "r_x3": 284.0, + "r_y3": 670.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33, + "r_y0": 559.67, + "r_x1": 126.33, + "r_y1": 428.0, + "r_x2": 105.67, + "r_y2": 428.0, + "r_x3": 105.67, + "r_y3": 559.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.67, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 545.0, + "r_x1": 204.33, + "r_y1": 442.33, + "r_x2": 188.33, + "r_y2": 442.33, + "r_x3": 188.33, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.33, + "r_x2": 268.0, + "r_y2": 442.33, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 515.0, + "r_x1": 315.33, + "r_y1": 468.0, + "r_x2": 299.67, + "r_y2": 468.0, + "r_x3": 299.67, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 385.67, + "r_x1": 121.67, + "r_y1": 274.0, + "r_x2": 105.67, + "r_y2": 274.0, + "r_x3": 105.67, + "r_y3": 385.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.67, + "r_x2": 137.0, + "r_y2": 292.67, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 384.0, + "r_x1": 204.33, + "r_y1": 278.0, + "r_x2": 188.33, + "r_y2": 278.0, + "r_x3": 188.33, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33, + "r_x1": 236.0, + "r_y1": 304.33, + "r_x2": 220.0, + "r_y2": 304.33, + "r_x3": 220.0, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.67, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 351.33, + "r_x1": 315.33, + "r_y1": 304.33, + "r_x2": 299.67, + "r_y2": 304.33, + "r_x3": 299.67, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 105.67, + "t": 602.0, + "r": 126.33, + "b": 694.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33, + "r_y0": 694.67, + "r_x1": 126.33, + "r_y1": 602.0, + "r_x2": 105.67, + "r_y2": 602.0, + "r_x3": 105.67, + "r_y3": 694.67, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 137.0, + "t": 610.0, + "r": 157.67, + "b": 680.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 157.67, + "r_y0": 680.33, + "r_x1": 157.67, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.33, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 204.0, + "t": 623.33, + "r": 220.0, + "b": 670.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 220.0, + "r_y0": 670.33, + "r_x1": 220.0, + "r_y1": 623.33, + "r_x2": 204.0, + "r_y2": 623.33, + "r_x3": 204.0, + "r_y3": 670.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 284.0, + "t": 623.33, + "r": 300.0, + "b": 670.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.33, + "r_x1": 300.0, + "r_y1": 623.33, + "r_x2": 284.0, + "r_y2": 623.33, + "r_x3": 284.0, + "r_y3": 670.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 105.67, + "t": 428.0, + "r": 126.33, + "b": 559.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 91.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33, + "r_y0": 559.67, + "r_x1": 126.33, + "r_y1": 428.0, + "r_x2": 105.67, + "r_y2": 428.0, + "r_x3": 105.67, + "r_y3": 559.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 137.0, + "t": 456.0, + "r": 153.0, + "b": 526.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.67, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 188.33, + "t": 442.33, + "r": 204.33, + "b": 545.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 545.0, + "r_x1": 204.33, + "r_y1": 442.33, + "r_x2": 188.33, + "r_y2": 442.33, + "r_x3": 188.33, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 220.0, + "t": 468.0, + "r": 236.0, + "b": 515.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 268.0, + "t": 442.33, + "r": 284.0, + "b": 545.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 93.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.33, + "r_x2": 268.0, + "r_y2": 442.33, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 299.67, + "t": 468.0, + "r": 315.33, + "b": 515.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 515.0, + "r_x1": 315.33, + "r_y1": 468.0, + "r_x2": 299.67, + "r_y2": 468.0, + "r_x3": 299.67, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 105.67, + "t": 274.0, + "r": 121.67, + "b": 385.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 385.67, + "r_x1": 121.67, + "r_y1": 274.0, + "r_x2": 105.67, + "r_y2": 274.0, + "r_x3": 105.67, + "r_y3": 385.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 137.0, + "t": 292.67, + "r": 153.0, + "b": 363.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.67, + "r_x2": 137.0, + "r_y2": 292.67, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 188.33, + "t": 278.0, + "r": 204.33, + "b": 384.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 384.0, + "r_x1": 204.33, + "r_y1": 278.0, + "r_x2": 188.33, + "r_y2": 278.0, + "r_x3": 188.33, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 220.0, + "t": 304.33, + "r": 236.0, + "b": 351.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33, + "r_x1": 236.0, + "r_y1": 304.33, + "r_x2": 220.0, + "r_y2": 304.33, + "r_x3": 220.0, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 268.0, + "t": 278.0, + "r": 284.0, + "b": 383.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.67, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 299.67, + "t": 304.33, + "r": 315.33, + "b": 351.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 351.33, + "r_x1": 315.33, + "r_y1": 304.33, + "r_x2": 299.67, + "r_y2": 304.33, + "r_x3": 299.67, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] } ] }, "tablestructure": { - "table_map": {} + "table_map": { + "0": { + "label": "table", + "id": 0, + "page_no": 0, + "cluster": { + "id": 0, + "label": "table", + "bbox": { + "l": 87.65, + "t": 240.9, + "r": 334.58, + "b": 722.96, + "coord_origin": "TOPLEFT" + }, + "confidence": 0.979, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33, + "r_y0": 694.67, + "r_x1": 126.33, + "r_y1": 602.0, + "r_x2": 105.67, + "r_y2": 602.0, + "r_x3": 105.67, + "r_y3": 694.67, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + }, + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 157.67, + "r_y0": 680.33, + "r_x1": 157.67, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.33, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 220.0, + "r_y0": 670.33, + "r_x1": 220.0, + "r_y1": 623.33, + "r_x2": 204.0, + "r_y2": 623.33, + "r_x3": 204.0, + "r_y3": 670.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.33, + "r_x1": 300.0, + "r_y1": 623.33, + "r_x2": 284.0, + "r_y2": 623.33, + "r_x3": 284.0, + "r_y3": 670.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33, + "r_y0": 559.67, + "r_x1": 126.33, + "r_y1": 428.0, + "r_x2": 105.67, + "r_y2": 428.0, + "r_x3": 105.67, + "r_y3": 559.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.67, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 545.0, + "r_x1": 204.33, + "r_y1": 442.33, + "r_x2": 188.33, + "r_y2": 442.33, + "r_x3": 188.33, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.33, + "r_x2": 268.0, + "r_y2": 442.33, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 515.0, + "r_x1": 315.33, + "r_y1": 468.0, + "r_x2": 299.67, + "r_y2": 468.0, + "r_x3": 299.67, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 385.67, + "r_x1": 121.67, + "r_y1": 274.0, + "r_x2": 105.67, + "r_y2": 274.0, + "r_x3": 105.67, + "r_y3": 385.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.67, + "r_x2": 137.0, + "r_y2": 292.67, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 384.0, + "r_x1": 204.33, + "r_y1": 278.0, + "r_x2": 188.33, + "r_y2": 278.0, + "r_x3": 188.33, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33, + "r_x1": 236.0, + "r_y1": 304.33, + "r_x2": 220.0, + "r_y2": 304.33, + "r_x3": 220.0, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.67, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 351.33, + "r_x1": 315.33, + "r_y1": 304.33, + "r_x2": 299.67, + "r_y2": 304.33, + "r_x3": 299.67, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 105.67, + "t": 602.0, + "r": 126.33, + "b": 694.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33, + "r_y0": 694.67, + "r_x1": 126.33, + "r_y1": 602.0, + "r_x2": 105.67, + "r_y2": 602.0, + "r_x3": 105.67, + "r_y3": 694.67, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 137.0, + "t": 610.0, + "r": 157.67, + "b": 680.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 157.67, + "r_y0": 680.33, + "r_x1": 157.67, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.33, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 204.0, + "t": 623.33, + "r": 220.0, + "b": 670.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 220.0, + "r_y0": 670.33, + "r_x1": 220.0, + "r_y1": 623.33, + "r_x2": 204.0, + "r_y2": 623.33, + "r_x3": 204.0, + "r_y3": 670.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 284.0, + "t": 623.33, + "r": 300.0, + "b": 670.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.33, + "r_x1": 300.0, + "r_y1": 623.33, + "r_x2": 284.0, + "r_y2": 623.33, + "r_x3": 284.0, + "r_y3": 670.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 105.67, + "t": 428.0, + "r": 126.33, + "b": 559.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 91.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33, + "r_y0": 559.67, + "r_x1": 126.33, + "r_y1": 428.0, + "r_x2": 105.67, + "r_y2": 428.0, + "r_x3": 105.67, + "r_y3": 559.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 137.0, + "t": 456.0, + "r": 153.0, + "b": 526.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.67, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 188.33, + "t": 442.33, + "r": 204.33, + "b": 545.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 545.0, + "r_x1": 204.33, + "r_y1": 442.33, + "r_x2": 188.33, + "r_y2": 442.33, + "r_x3": 188.33, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 220.0, + "t": 468.0, + "r": 236.0, + "b": 515.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 268.0, + "t": 442.33, + "r": 284.0, + "b": 545.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 93.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.33, + "r_x2": 268.0, + "r_y2": 442.33, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 299.67, + "t": 468.0, + "r": 315.33, + "b": 515.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 515.0, + "r_x1": 315.33, + "r_y1": 468.0, + "r_x2": 299.67, + "r_y2": 468.0, + "r_x3": 299.67, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 105.67, + "t": 274.0, + "r": 121.67, + "b": 385.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 385.67, + "r_x1": 121.67, + "r_y1": 274.0, + "r_x2": 105.67, + "r_y2": 274.0, + "r_x3": 105.67, + "r_y3": 385.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 137.0, + "t": 292.67, + "r": 153.0, + "b": 363.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.67, + "r_x2": 137.0, + "r_y2": 292.67, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 188.33, + "t": 278.0, + "r": 204.33, + "b": 384.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 384.0, + "r_x1": 204.33, + "r_y1": 278.0, + "r_x2": 188.33, + "r_y2": 278.0, + "r_x3": 188.33, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 220.0, + "t": 304.33, + "r": 236.0, + "b": 351.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33, + "r_x1": 236.0, + "r_y1": 304.33, + "r_x2": 220.0, + "r_y2": 304.33, + "r_x3": 220.0, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 268.0, + "t": 278.0, + "r": 284.0, + "b": 383.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.67, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 299.67, + "t": 304.33, + "r": 315.33, + "b": 351.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 351.33, + "r_x1": 315.33, + "r_y1": 304.33, + "r_x2": 299.67, + "r_y2": 304.33, + "r_x3": 299.67, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] + }, + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 0, + "num_cols": 0, + "table_cells": [] + } + } }, "figures_classification": null, "equations_prediction": null, @@ -262,20 +2618,20 @@ "assembled": { "elements": [ { - "label": "page_header", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 77.1, - "t": 89.13, - "r": 126.08, - "b": 523.32, + "l": 87.65, + "t": 240.9, + "r": 334.58, + "b": 722.96, "coord_origin": "TOPLEFT" }, - "confidence": 0.602, + "confidence": 0.979, "cells": [ { "index": 0, @@ -286,20 +2642,20 @@ "a": 255 }, "rect": { - "r_x0": 77.1, - "r_y0": 520.76, - "r_x1": 96.68, - "r_y1": 520.76, - "r_x2": 96.68, - "r_y2": 89.24, - "r_x3": 77.1, - "r_y3": 89.24, + "r_x0": 126.33, + "r_y0": 694.67, + "r_x1": 126.33, + "r_y1": 602.0, + "r_x2": 105.67, + "r_y2": 602.0, + "r_x3": 105.67, + "r_y3": 694.67, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -311,43 +2667,22 @@ "a": 255 }, "rect": { - "r_x0": 100.64, - "r_y0": 523.32, - "r_x1": 126.08, - "r_y1": 523.32, - "r_x2": 126.08, - "r_y2": 89.13, - "r_x3": 100.64, - "r_y3": 89.13, + "r_x0": 157.67, + "r_y0": 680.33, + "r_x1": 157.67, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.33, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, "from_ocr": true - } - ], - "children": [] - }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" - }, - { - "label": "text", - "id": 1, - "page_no": 0, - "cluster": { - "id": 1, - "label": "text", - "bbox": { - "l": 131.21, - "t": 441.01, - "r": 152.2, - "b": 521.08, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.523, - "cells": [ + }, { "index": 2, "rgba": { @@ -357,92 +2692,1027 @@ "a": 255 }, "rect": { - "r_x0": 131.21, - "r_y0": 521.08, - "r_x1": 152.2, - "r_y1": 521.08, - "r_x2": 152.2, - "r_y2": 441.01, - "r_x3": 131.21, - "r_y3": 441.01, + "r_x0": 220.0, + "r_y0": 670.33, + "r_x1": 220.0, + "r_y1": 623.33, + "r_x2": 204.0, + "r_y2": 623.33, + "r_x3": 204.0, + "r_y3": 670.33, "coord_origin": "TOPLEFT" }, - "text": "package", - "orig": "package", + "text": "value", + "orig": "value", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.33, + "r_x1": 300.0, + "r_y1": 623.33, + "r_x2": 284.0, + "r_y2": 623.33, + "r_x3": 284.0, + "r_y3": 670.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33, + "r_y0": 559.67, + "r_x1": 126.33, + "r_y1": 428.0, + "r_x2": 105.67, + "r_y2": 428.0, + "r_x3": 105.67, + "r_y3": 559.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.67, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 545.0, + "r_x1": 204.33, + "r_y1": 442.33, + "r_x2": 188.33, + "r_y2": 442.33, + "r_x3": 188.33, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.33, + "r_x2": 268.0, + "r_y2": 442.33, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 515.0, + "r_x1": 315.33, + "r_y1": 468.0, + "r_x2": 299.67, + "r_y2": 468.0, + "r_x3": 299.67, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 385.67, + "r_x1": 121.67, + "r_y1": 274.0, + "r_x2": 105.67, + "r_y2": 274.0, + "r_x3": 105.67, + "r_y3": 385.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.67, + "r_x2": 137.0, + "r_y2": 292.67, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 384.0, + "r_x1": 204.33, + "r_y1": 278.0, + "r_x2": 188.33, + "r_y2": 278.0, + "r_x3": 188.33, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33, + "r_x1": 236.0, + "r_y1": 304.33, + "r_x2": 220.0, + "r_y2": 304.33, + "r_x3": 220.0, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.67, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 351.33, + "r_x1": 315.33, + "r_y1": 304.33, + "r_x2": 299.67, + "r_y2": 304.33, + "r_x3": 299.67, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 105.67, + "t": 602.0, + "r": 126.33, + "b": 694.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33, + "r_y0": 694.67, + "r_x1": 126.33, + "r_y1": 602.0, + "r_x2": 105.67, + "r_y2": 602.0, + "r_x3": 105.67, + "r_y3": 694.67, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 137.0, + "t": 610.0, + "r": 157.67, + "b": 680.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 157.67, + "r_y0": 680.33, + "r_x1": 157.67, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.33, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 204.0, + "t": 623.33, + "r": 220.0, + "b": 670.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 220.0, + "r_y0": 670.33, + "r_x1": 220.0, + "r_y1": 623.33, + "r_x2": 204.0, + "r_y2": 623.33, + "r_x3": 204.0, + "r_y3": 670.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 284.0, + "t": 623.33, + "r": 300.0, + "b": 670.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.33, + "r_x1": 300.0, + "r_y1": 623.33, + "r_x2": 284.0, + "r_y2": 623.33, + "r_x3": 284.0, + "r_y3": 670.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 105.67, + "t": 428.0, + "r": 126.33, + "b": 559.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 91.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33, + "r_y0": 559.67, + "r_x1": 126.33, + "r_y1": 428.0, + "r_x2": 105.67, + "r_y2": 428.0, + "r_x3": 105.67, + "r_y3": 559.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 137.0, + "t": 456.0, + "r": 153.0, + "b": 526.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.67, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 188.33, + "t": 442.33, + "r": 204.33, + "b": 545.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 545.0, + "r_x1": 204.33, + "r_y1": 442.33, + "r_x2": 188.33, + "r_y2": 442.33, + "r_x3": 188.33, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 220.0, + "t": 468.0, + "r": 236.0, + "b": 515.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 268.0, + "t": 442.33, + "r": 284.0, + "b": 545.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 93.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.33, + "r_x2": 268.0, + "r_y2": 442.33, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 299.67, + "t": 468.0, + "r": 315.33, + "b": 515.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 515.0, + "r_x1": 315.33, + "r_y1": 468.0, + "r_x2": 299.67, + "r_y2": 468.0, + "r_x3": 299.67, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 105.67, + "t": 274.0, + "r": 121.67, + "b": 385.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 385.67, + "r_x1": 121.67, + "r_y1": 274.0, + "r_x2": 105.67, + "r_y2": 274.0, + "r_x3": 105.67, + "r_y3": 385.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 137.0, + "t": 292.67, + "r": 153.0, + "b": 363.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.67, + "r_x2": 137.0, + "r_y2": 292.67, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 188.33, + "t": 278.0, + "r": 204.33, + "b": 384.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 384.0, + "r_x1": 204.33, + "r_y1": 278.0, + "r_x2": 188.33, + "r_y2": 278.0, + "r_x3": 188.33, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 220.0, + "t": 304.33, + "r": 236.0, + "b": 351.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33, + "r_x1": 236.0, + "r_y1": 304.33, + "r_x2": 220.0, + "r_y2": 304.33, + "r_x3": 220.0, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 268.0, + "t": 278.0, + "r": 284.0, + "b": 383.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.67, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 299.67, + "t": 304.33, + "r": 315.33, + "b": 351.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 351.33, + "r_x1": 315.33, + "r_y1": 304.33, + "r_x2": 299.67, + "r_y2": 304.33, + "r_x3": 299.67, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "package" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 0, + "num_cols": 0, + "table_cells": [] } ], "body": [ { - "label": "text", - "id": 1, - "page_no": 0, - "cluster": { - "id": 1, - "label": "text", - "bbox": { - "l": 131.21, - "t": 441.01, - "r": 152.2, - "b": 521.08, - "coord_origin": "TOPLEFT" - }, - "confidence": 0.523, - "cells": [ - { - "index": 2, - "rgba": { - "r": 0, - "g": 0, - "b": 0, - "a": 255 - }, - "rect": { - "r_x0": 131.21, - "r_y0": 521.08, - "r_x1": 152.2, - "r_y1": 521.08, - "r_x2": 152.2, - "r_y2": 441.01, - "r_x3": 131.21, - "r_y3": 441.01, - "coord_origin": "TOPLEFT" - }, - "text": "package", - "orig": "package", - "text_direction": "left_to_right", - "confidence": 1.0, - "from_ocr": true - } - ], - "children": [] - }, - "text": "package" - } - ], - "headers": [ - { - "label": "page_header", + "label": "table", "id": 0, "page_no": 0, "cluster": { "id": 0, - "label": "page_header", + "label": "table", "bbox": { - "l": 77.1, - "t": 89.13, - "r": 126.08, - "b": 523.32, + "l": 87.65, + "t": 240.9, + "r": 334.58, + "b": 722.96, "coord_origin": "TOPLEFT" }, - "confidence": 0.602, + "confidence": 0.979, "cells": [ { "index": 0, @@ -453,20 +3723,20 @@ "a": 255 }, "rect": { - "r_x0": 77.1, - "r_y0": 520.76, - "r_x1": 96.68, - "r_y1": 520.76, - "r_x2": 96.68, - "r_y2": 89.24, - "r_x3": 77.1, - "r_y3": 89.24, + "r_x0": 126.33, + "r_y0": 694.67, + "r_x1": 126.33, + "r_y1": 602.0, + "r_x2": 105.67, + "r_y2": 602.0, + "r_x3": 105.67, + "r_y3": 694.67, "coord_origin": "TOPLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", + "text": "Vertically", + "orig": "Vertically", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 90.0, "from_ocr": true }, { @@ -478,28 +3748,1037 @@ "a": 255 }, "rect": { - "r_x0": 100.64, - "r_y0": 523.32, - "r_x1": 126.08, - "r_y1": 523.32, - "r_x2": 126.08, - "r_y2": 89.13, - "r_x3": 100.64, - "r_y3": 89.13, + "r_x0": 157.67, + "r_y0": 680.33, + "r_x1": 157.67, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.33, "coord_origin": "TOPLEFT" }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", + "text": "merged", + "orig": "merged", "text_direction": "left_to_right", - "confidence": 1.0, + "confidence": 92.0, + "from_ocr": true + }, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 220.0, + "r_y0": 670.33, + "r_x1": 220.0, + "r_y1": 623.33, + "r_x2": 204.0, + "r_y2": 623.33, + "r_x3": 204.0, + "r_y3": 670.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.33, + "r_x1": 300.0, + "r_y1": 623.33, + "r_x2": 284.0, + "r_y2": 623.33, + "r_x3": 284.0, + "r_y3": 670.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33, + "r_y0": 559.67, + "r_x1": 126.33, + "r_y1": 428.0, + "r_x2": 105.67, + "r_y2": 428.0, + "r_x3": 105.67, + "r_y3": 559.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + }, + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.67, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 545.0, + "r_x1": 204.33, + "r_y1": 442.33, + "r_x2": 188.33, + "r_y2": 442.33, + "r_x3": 188.33, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.33, + "r_x2": 268.0, + "r_y2": 442.33, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + }, + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 515.0, + "r_x1": 315.33, + "r_y1": 468.0, + "r_x2": 299.67, + "r_y2": 468.0, + "r_x3": 299.67, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + }, + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 385.67, + "r_x1": 121.67, + "r_y1": 274.0, + "r_x2": 105.67, + "r_y2": 274.0, + "r_x3": 105.67, + "r_y3": 385.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.67, + "r_x2": 137.0, + "r_y2": 292.67, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + }, + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 384.0, + "r_x1": 204.33, + "r_y1": 278.0, + "r_x2": 188.33, + "r_y2": 278.0, + "r_x3": 188.33, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33, + "r_x1": 236.0, + "r_y1": 304.33, + "r_x2": 220.0, + "r_y2": 304.33, + "r_x3": 220.0, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.67, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + }, + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 351.33, + "r_x1": 315.33, + "r_y1": 304.33, + "r_x2": 299.67, + "r_y2": 304.33, + "r_x3": 299.67, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, "from_ocr": true } ], - "children": [] + "children": [ + { + "id": 1, + "label": "text", + "bbox": { + "l": 105.67, + "t": 602.0, + "r": 126.33, + "b": 694.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 90.0, + "cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33, + "r_y0": 694.67, + "r_x1": 126.33, + "r_y1": 602.0, + "r_x2": 105.67, + "r_y2": 602.0, + "r_x3": 105.67, + "r_y3": 694.67, + "coord_origin": "TOPLEFT" + }, + "text": "Vertically", + "orig": "Vertically", + "text_direction": "left_to_right", + "confidence": 90.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 2, + "label": "text", + "bbox": { + "l": 137.0, + "t": 610.0, + "r": 157.67, + "b": 680.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 92.0, + "cells": [ + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 157.67, + "r_y0": 680.33, + "r_x1": 157.67, + "r_y1": 610.0, + "r_x2": 137.0, + "r_y2": 610.0, + "r_x3": 137.0, + "r_y3": 680.33, + "coord_origin": "TOPLEFT" + }, + "text": "merged", + "orig": "merged", + "text_direction": "left_to_right", + "confidence": 92.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 3, + "label": "text", + "bbox": { + "l": 204.0, + "t": 623.33, + "r": 220.0, + "b": 670.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 220.0, + "r_y0": 670.33, + "r_x1": 220.0, + "r_y1": 623.33, + "r_x2": 204.0, + "r_y2": 623.33, + "r_x3": 204.0, + "r_y3": 670.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 4, + "label": "text", + "bbox": { + "l": 284.0, + "t": 623.33, + "r": 300.0, + "b": 670.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 3, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 300.0, + "r_y0": 670.33, + "r_x1": 300.0, + "r_y1": 623.33, + "r_x2": 284.0, + "r_y2": 623.33, + "r_x3": 284.0, + "r_y3": 670.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 5, + "label": "text", + "bbox": { + "l": 105.67, + "t": 428.0, + "r": 126.33, + "b": 559.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 91.0, + "cells": [ + { + "index": 4, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 126.33, + "r_y0": 559.67, + "r_x1": 126.33, + "r_y1": 428.0, + "r_x2": 105.67, + "r_y2": 428.0, + "r_x3": 105.67, + "r_y3": 559.67, + "coord_origin": "TOPLEFT" + }, + "text": "Other merged", + "orig": "Other merged", + "text_direction": "left_to_right", + "confidence": 91.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 6, + "label": "text", + "bbox": { + "l": 137.0, + "t": 456.0, + "r": 153.0, + "b": 526.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 5, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 526.67, + "r_x1": 153.0, + "r_y1": 456.0, + "r_x2": 137.0, + "r_y2": 456.0, + "r_x3": 137.0, + "r_y3": 526.67, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 7, + "label": "text", + "bbox": { + "l": 188.33, + "t": 442.33, + "r": 204.33, + "b": 545.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 6, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 545.0, + "r_x1": 204.33, + "r_y1": 442.33, + "r_x2": 188.33, + "r_y2": 442.33, + "r_x3": 188.33, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 8, + "label": "text", + "bbox": { + "l": 220.0, + "t": 468.0, + "r": 236.0, + "b": 515.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 7, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 515.0, + "r_x1": 236.0, + "r_y1": 468.0, + "r_x2": 220.0, + "r_y2": 468.0, + "r_x3": 220.0, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 9, + "label": "text", + "bbox": { + "l": 268.0, + "t": 442.33, + "r": 284.0, + "b": 545.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 93.0, + "cells": [ + { + "index": 8, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 545.0, + "r_x1": 284.0, + "r_y1": 442.33, + "r_x2": 268.0, + "r_y2": 442.33, + "r_x3": 268.0, + "r_y3": 545.0, + "coord_origin": "TOPLEFT" + }, + "text": "Some other", + "orig": "Some other", + "text_direction": "left_to_right", + "confidence": 93.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 10, + "label": "text", + "bbox": { + "l": 299.67, + "t": 468.0, + "r": 315.33, + "b": 515.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 96.0, + "cells": [ + { + "index": 9, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 515.0, + "r_x1": 315.33, + "r_y1": 468.0, + "r_x2": 299.67, + "r_y2": 468.0, + "r_x3": 299.67, + "r_y3": 515.0, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 96.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 11, + "label": "text", + "bbox": { + "l": 105.67, + "t": 274.0, + "r": 121.67, + "b": 385.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 10, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 121.67, + "r_y0": 385.67, + "r_x1": 121.67, + "r_y1": 274.0, + "r_x2": 105.67, + "r_y2": 274.0, + "r_x3": 105.67, + "r_y3": 385.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 12, + "label": "text", + "bbox": { + "l": 137.0, + "t": 292.67, + "r": 153.0, + "b": 363.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 94.0, + "cells": [ + { + "index": 11, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 153.0, + "r_y0": 363.0, + "r_x1": 153.0, + "r_y1": 292.67, + "r_x2": 137.0, + "r_y2": 292.67, + "r_x3": 137.0, + "r_y3": 363.0, + "coord_origin": "TOPLEFT" + }, + "text": "column", + "orig": "column", + "text_direction": "left_to_right", + "confidence": 94.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 13, + "label": "text", + "bbox": { + "l": 188.33, + "t": 278.0, + "r": 204.33, + "b": 384.0, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 12, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 204.33, + "r_y0": 384.0, + "r_x1": 204.33, + "r_y1": 278.0, + "r_x2": 188.33, + "r_y2": 278.0, + "r_x3": 188.33, + "r_y3": 384.0, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 14, + "label": "text", + "bbox": { + "l": 220.0, + "t": 304.33, + "r": 236.0, + "b": 351.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 13, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 236.0, + "r_y0": 351.33, + "r_x1": 236.0, + "r_y1": 304.33, + "r_x2": 220.0, + "r_y2": 304.33, + "r_x3": 220.0, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 15, + "label": "text", + "bbox": { + "l": 268.0, + "t": 278.0, + "r": 284.0, + "b": 383.67, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 14, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 284.0, + "r_y0": 383.67, + "r_x1": 284.0, + "r_y1": 278.0, + "r_x2": 268.0, + "r_y2": 278.0, + "r_x3": 268.0, + "r_y3": 383.67, + "coord_origin": "TOPLEFT" + }, + "text": "Yet another", + "orig": "Yet another", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + }, + { + "id": 16, + "label": "text", + "bbox": { + "l": 299.67, + "t": 304.33, + "r": 315.33, + "b": 351.33, + "coord_origin": "TOPLEFT" + }, + "confidence": 95.0, + "cells": [ + { + "index": 15, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 315.33, + "r_y0": 351.33, + "r_x1": 315.33, + "r_y1": 304.33, + "r_x2": 299.67, + "r_y2": 304.33, + "r_x3": 299.67, + "r_y3": 351.33, + "coord_origin": "TOPLEFT" + }, + "text": "value", + "orig": "value", + "text_direction": "left_to_right", + "confidence": 95.0, + "from_ocr": true + } + ], + "children": [] + } + ] }, - "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained" + "text": null, + "otsl_seq": [ + "ched", + "ched", + "ched", + "nl", + "fcel", + "fcel", + "fcel", + "nl", + "fcel", + "fcel", + "fcel", + "nl" + ], + "num_rows": 0, + "num_cols": 0, + "table_cells": [] } - ] + ], + "headers": [] } } ] \ No newline at end of file diff --git a/tests/data_scanned/ocr_test.pdf b/tests/data_scanned/ocr_test.pdf index b79f3c28..7d14233e 100644 Binary files a/tests/data_scanned/ocr_test.pdf and b/tests/data_scanned/ocr_test.pdf differ diff --git a/tests/data_scanned/ocr_test_rotated_180.pdf b/tests/data_scanned/ocr_test_rotated_180.pdf index 1c030b49..959da830 100644 Binary files a/tests/data_scanned/ocr_test_rotated_180.pdf and b/tests/data_scanned/ocr_test_rotated_180.pdf differ diff --git a/tests/data_scanned/ocr_test_rotated_270.pdf b/tests/data_scanned/ocr_test_rotated_270.pdf index a6e90baf..ebd482db 100644 Binary files a/tests/data_scanned/ocr_test_rotated_270.pdf and b/tests/data_scanned/ocr_test_rotated_270.pdf differ diff --git a/tests/data_scanned/ocr_test_rotated_90.pdf b/tests/data_scanned/ocr_test_rotated_90.pdf index bd08daae..f1447723 100644 Binary files a/tests/data_scanned/ocr_test_rotated_90.pdf and b/tests/data_scanned/ocr_test_rotated_90.pdf differ diff --git a/tests/test_e2e_ocr_conversion.py b/tests/test_e2e_ocr_conversion.py index a19a4090..e67ea05f 100644 --- a/tests/test_e2e_ocr_conversion.py +++ b/tests/test_e2e_ocr_conversion.py @@ -73,8 +73,8 @@ def test_e2e_conversions(): # only works on mac if "darwin" == sys.platform: - engines.append((OcrMacOptions(), True)) - engines.append((OcrMacOptions(force_full_page_ocr=True), True)) + engines.append((OcrMacOptions(), False)) + engines.append((OcrMacOptions(force_full_page_ocr=True), False)) for ocr_options, supports_rotation in engines: print(