diff --git a/tests/data/webp/groundtruth/docling_v2/webp-test.pages.json b/tests/data/webp/groundtruth/docling_v2/webp-test.pages.json index 732403c0..2d66e8f9 100644 --- a/tests/data/webp/groundtruth/docling_v2/webp-test.pages.json +++ b/tests/data/webp/groundtruth/docling_v2/webp-test.pages.json @@ -5,84 +5,159 @@ "width": 2000.0, "height": 2829.0 }, - "cells": [ - { - "index": 0, - "rgba": { - "r": 0, - "g": 0, - "b": 0, - "a": 255 - }, + "parsed_page": { + "dimension": { + "angle": 0.0, "rect": { - "r_x0": 246.4065456254215, - "r_y0": 329.06770715202435, - "r_x1": 1691.991797818404, - "r_y1": 329.06770715202435, - "r_x2": 1691.991797818404, - "r_y2": 258.9040166758338, - "r_x3": 246.4065456254215, - "r_y3": 258.9040166758338, - "coord_origin": "TOPLEFT" + "r_x0": 0.0, + "r_y0": 0.0, + "r_x1": 2000.0, + "r_y1": 0.0, + "r_x2": 2000.0, + "r_y2": 2829.0, + "r_x3": 0.0, + "r_y3": 2829.0, + "coord_origin": "BOTTOMLEFT" }, - "text": "Docling bundles PDF document conversion to", - "orig": "Docling bundles PDF document conversion to", - "text_direction": "left_to_right", - "confidence": 1.0, - "from_ocr": true + "boundary_type": "crop_box", + "art_bbox": { + "l": 0.0, + "t": 2829.0, + "r": 2000.0, + "b": 0.0, + "coord_origin": "BOTTOMLEFT" + }, + "bleed_bbox": { + "l": 0.0, + "t": 2829.0, + "r": 2000.0, + "b": 0.0, + "coord_origin": "BOTTOMLEFT" + }, + "crop_bbox": { + "l": 0.0, + "t": 2829.0, + "r": 2000.0, + "b": 0.0, + "coord_origin": "BOTTOMLEFT" + }, + "media_bbox": { + "l": 0.0, + "t": 2829.0, + "r": 2000.0, + "b": 0.0, + "coord_origin": "BOTTOMLEFT" + }, + "trim_bbox": { + "l": 0.0, + "t": 2829.0, + "r": 2000.0, + "b": 0.0, + "coord_origin": "BOTTOMLEFT" + } }, - { - "index": 1, - "rgba": { - "r": 0, - "g": 0, - "b": 0, - "a": 255 + "bitmap_resources": [ + { + "index": 0, + "rect": { + "r_x0": 0.0, + "r_y0": 0.0, + "r_x1": 2000.0, + "r_y1": 0.0, + "r_x2": 2000.0, + "r_y2": 2829.0, + "r_x3": 0.0, + "r_y3": 2829.0, + "coord_origin": "BOTTOMLEFT" + }, + "uri": null + } + ], + "char_cells": [], + "word_cells": [], + "textline_cells": [ + { + "index": 0, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 246.4065456254215, + "r_y0": 329.06770715202435, + "r_x1": 1691.991797818404, + "r_y1": 329.06770715202435, + "r_x2": 1691.991797818404, + "r_y2": 258.9040166758338, + "r_x3": 246.4065456254215, + "r_y3": 258.9040166758338, + "coord_origin": "TOPLEFT" + }, + "text": "Docling bundles PDF document conversion to", + "orig": "Docling bundles PDF document conversion to", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": true }, - "rect": { - "r_x0": 234.08627147881114, - "r_y0": 419.5788697734327, - "r_x1": 1696.0985042090742, - "r_y1": 419.5788697734327, - "r_x2": 1696.0985042090742, - "r_y2": 349.4151792972422, - "r_x3": 234.08627147881114, - "r_y3": 349.4151792972422, - "coord_origin": "TOPLEFT" + { + "index": 1, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 234.08627147881114, + "r_y0": 419.5788697734327, + "r_x1": 1696.0985042090742, + "r_y1": 419.5788697734327, + "r_x2": 1696.0985042090742, + "r_y2": 349.4151792972422, + "r_x3": 234.08627147881114, + "r_y3": 349.4151792972422, + "coord_origin": "TOPLEFT" + }, + "text": "JSON and Markdown in an easy self contained", + "orig": "JSON and Markdown in an easy self contained", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": true }, - "text": "JSON and Markdown in an easy self contained", - "orig": "JSON and Markdown in an easy self contained", - "text_direction": "left_to_right", - "confidence": 1.0, - "from_ocr": true - }, - { - "index": 2, - "rgba": { - "r": 0, - "g": 0, - "b": 0, - "a": 255 - }, - "rect": { - "r_x0": 242.29979922858777, - "r_y0": 509.8779072023336, - "r_x1": 513.3470125989277, - "r_y1": 509.8779072023336, - "r_x2": 513.3470125989277, - "r_y2": 439.9752910477536, - "r_x3": 242.29979922858777, - "r_y3": 439.9752910477536, - "coord_origin": "TOPLEFT" - }, - "text": "package", - "orig": "package", - "text_direction": "left_to_right", - "confidence": 1.0, - "from_ocr": true - } - ], - "parsed_page": null, + { + "index": 2, + "rgba": { + "r": 0, + "g": 0, + "b": 0, + "a": 255 + }, + "rect": { + "r_x0": 242.29979922858777, + "r_y0": 509.8779072023336, + "r_x1": 513.3470125989277, + "r_y1": 509.8779072023336, + "r_x2": 513.3470125989277, + "r_y2": 439.9752910477536, + "r_x3": 242.29979922858777, + "r_y3": 439.9752910477536, + "coord_origin": "TOPLEFT" + }, + "text": "package", + "orig": "package", + "text_direction": "left_to_right", + "confidence": 1.0, + "from_ocr": true + } + ], + "has_chars": false, + "has_words": false, + "has_lines": true, + "image": null, + "lines": [] + }, "predictions": { "layout": { "clusters": [