mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
ci: update docling-parse and remove pages.json (#2372)
* update docling-parse and remove pages.json Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * ocr gt Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -1,447 +0,0 @@
|
||||
[
|
||||
{
|
||||
"page_no": 0,
|
||||
"size": {
|
||||
"width": 595.201171875,
|
||||
"height": 841.9216918945312
|
||||
},
|
||||
"parsed_page": {
|
||||
"dimension": {
|
||||
"angle": 0.0,
|
||||
"rect": {
|
||||
"r_x0": 0.0,
|
||||
"r_y0": 0.0,
|
||||
"r_x1": 595.201171875,
|
||||
"r_y1": 0.0,
|
||||
"r_x2": 595.201171875,
|
||||
"r_y2": 841.9216918945312,
|
||||
"r_x3": 0.0,
|
||||
"r_y3": 841.9216918945312,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"boundary_type": "crop_box",
|
||||
"art_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"bleed_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"crop_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"media_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"trim_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
}
|
||||
},
|
||||
"bitmap_resources": [],
|
||||
"char_cells": [],
|
||||
"word_cells": [],
|
||||
"textline_cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 73.34702132031646,
|
||||
"r_y0": 97.99999977896755,
|
||||
"r_x1": 503.64955224479564,
|
||||
"r_y1": 97.99999977896755,
|
||||
"r_x2": 503.64955224479564,
|
||||
"r_y2": 76.99999977896756,
|
||||
"r_x3": 73.34702132031646,
|
||||
"r_y3": 76.99999977896756,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 69.6796630536824,
|
||||
"r_y0": 124.83139494707741,
|
||||
"r_x1": 504.8720051760782,
|
||||
"r_y1": 124.83139494707741,
|
||||
"r_x2": 504.8720051760782,
|
||||
"r_y2": 104.00000011573796,
|
||||
"r_x3": 69.6796630536824,
|
||||
"r_y3": 104.00000011573796,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 71.84193505100733,
|
||||
"r_y0": 152.90926970226084,
|
||||
"r_x1": 153.088934155825,
|
||||
"r_y1": 152.90926970226084,
|
||||
"r_x2": 153.088934155825,
|
||||
"r_y2": 129.797125232046,
|
||||
"r_x3": 71.84193505100733,
|
||||
"r_y3": 129.797125232046,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"has_chars": false,
|
||||
"has_words": false,
|
||||
"has_lines": true,
|
||||
"image": null,
|
||||
"lines": []
|
||||
},
|
||||
"predictions": {
|
||||
"layout": {
|
||||
"clusters": [
|
||||
{
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 69.6796630536824,
|
||||
"t": 76.99999977896756,
|
||||
"r": 504.8720051760782,
|
||||
"b": 152.90926970226084,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9715733528137207,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 73.34702132031646,
|
||||
"r_y0": 97.99999977896755,
|
||||
"r_x1": 503.64955224479564,
|
||||
"r_y1": 97.99999977896755,
|
||||
"r_x2": 503.64955224479564,
|
||||
"r_y2": 76.99999977896756,
|
||||
"r_x3": 73.34702132031646,
|
||||
"r_y3": 76.99999977896756,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 69.6796630536824,
|
||||
"r_y0": 124.83139494707741,
|
||||
"r_x1": 504.8720051760782,
|
||||
"r_y1": 124.83139494707741,
|
||||
"r_x2": 504.8720051760782,
|
||||
"r_y2": 104.00000011573796,
|
||||
"r_x3": 69.6796630536824,
|
||||
"r_y3": 104.00000011573796,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 71.84193505100733,
|
||||
"r_y0": 152.90926970226084,
|
||||
"r_x1": 153.088934155825,
|
||||
"r_y1": 152.90926970226084,
|
||||
"r_x2": 153.088934155825,
|
||||
"r_y2": 129.797125232046,
|
||||
"r_x3": 71.84193505100733,
|
||||
"r_y3": 129.797125232046,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"tablestructure": {
|
||||
"table_map": {}
|
||||
},
|
||||
"figures_classification": null,
|
||||
"equations_prediction": null,
|
||||
"vlm_response": null
|
||||
},
|
||||
"assembled": {
|
||||
"elements": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 69.6796630536824,
|
||||
"t": 76.99999977896756,
|
||||
"r": 504.8720051760782,
|
||||
"b": 152.90926970226084,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9715733528137207,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 73.34702132031646,
|
||||
"r_y0": 97.99999977896755,
|
||||
"r_x1": 503.64955224479564,
|
||||
"r_y1": 97.99999977896755,
|
||||
"r_x2": 503.64955224479564,
|
||||
"r_y2": 76.99999977896756,
|
||||
"r_x3": 73.34702132031646,
|
||||
"r_y3": 76.99999977896756,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 69.6796630536824,
|
||||
"r_y0": 124.83139494707741,
|
||||
"r_x1": 504.8720051760782,
|
||||
"r_y1": 124.83139494707741,
|
||||
"r_x2": 504.8720051760782,
|
||||
"r_y2": 104.00000011573796,
|
||||
"r_x3": 69.6796630536824,
|
||||
"r_y3": 104.00000011573796,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 71.84193505100733,
|
||||
"r_y0": 152.90926970226084,
|
||||
"r_x1": 153.088934155825,
|
||||
"r_y1": 152.90926970226084,
|
||||
"r_x2": 153.088934155825,
|
||||
"r_y2": 129.797125232046,
|
||||
"r_x3": 71.84193505100733,
|
||||
"r_y3": 129.797125232046,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"
|
||||
}
|
||||
],
|
||||
"body": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 69.6796630536824,
|
||||
"t": 76.99999977896756,
|
||||
"r": 504.8720051760782,
|
||||
"b": 152.90926970226084,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9715733528137207,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 73.34702132031646,
|
||||
"r_y0": 97.99999977896755,
|
||||
"r_x1": 503.64955224479564,
|
||||
"r_y1": 97.99999977896755,
|
||||
"r_x2": 503.64955224479564,
|
||||
"r_y2": 76.99999977896756,
|
||||
"r_x3": 73.34702132031646,
|
||||
"r_y3": 76.99999977896756,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 69.6796630536824,
|
||||
"r_y0": 124.83139494707741,
|
||||
"r_x1": 504.8720051760782,
|
||||
"r_y1": 124.83139494707741,
|
||||
"r_x2": 504.8720051760782,
|
||||
"r_y2": 104.00000011573796,
|
||||
"r_x3": 69.6796630536824,
|
||||
"r_y3": 104.00000011573796,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 71.84193505100733,
|
||||
"r_y0": 152.90926970226084,
|
||||
"r_x1": 153.088934155825,
|
||||
"r_y1": 152.90926970226084,
|
||||
"r_x2": 153.088934155825,
|
||||
"r_y2": 129.797125232046,
|
||||
"r_x3": 71.84193505100733,
|
||||
"r_y3": 129.797125232046,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"
|
||||
}
|
||||
],
|
||||
"headers": []
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -1 +0,0 @@
|
||||
[{"page_no": 0, "size": {"width": 841.9216918945312, "height": 595.201171875}, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "package"}], "headers": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}]}}]
|
||||
@@ -1,504 +0,0 @@
|
||||
[
|
||||
{
|
||||
"page_no": 0,
|
||||
"size": {
|
||||
"width": 595.201171875,
|
||||
"height": 841.9216918945312
|
||||
},
|
||||
"parsed_page": {
|
||||
"dimension": {
|
||||
"angle": 0.0,
|
||||
"rect": {
|
||||
"r_x0": 0.0,
|
||||
"r_y0": 0.0,
|
||||
"r_x1": 595.201171875,
|
||||
"r_y1": 0.0,
|
||||
"r_x2": 595.201171875,
|
||||
"r_y2": 841.9216918945312,
|
||||
"r_x3": 0.0,
|
||||
"r_y3": 841.9216918945312,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"boundary_type": "crop_box",
|
||||
"art_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"bleed_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"crop_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"media_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"trim_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
}
|
||||
},
|
||||
"bitmap_resources": [],
|
||||
"char_cells": [],
|
||||
"word_cells": [],
|
||||
"textline_cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.2388782764286,
|
||||
"r_y0": 764.898293373551,
|
||||
"r_x1": 521.9863147998661,
|
||||
"r_y1": 764.898293373551,
|
||||
"r_x2": 521.9863147998661,
|
||||
"r_y2": 744.0929853494625,
|
||||
"r_x3": 89.2388782764286,
|
||||
"r_y3": 744.0929853494625,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.23887497045128,
|
||||
"r_y0": 739.1977118987292,
|
||||
"r_x1": 523.208764293368,
|
||||
"r_y1": 739.1977118987292,
|
||||
"r_x2": 523.208764293368,
|
||||
"r_y2": 717.1685676116198,
|
||||
"r_x3": 89.23887497045128,
|
||||
"r_y3": 717.1685676116198,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 441.2561096985719,
|
||||
"r_y0": 710.0268078458798,
|
||||
"r_x1": 522.0347860494834,
|
||||
"r_y1": 710.0268078458798,
|
||||
"r_x2": 522.0347860494834,
|
||||
"r_y2": 690.0429592741025,
|
||||
"r_x3": 441.2561096985719,
|
||||
"r_y3": 690.0429592741025,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"has_chars": false,
|
||||
"has_words": false,
|
||||
"has_lines": true,
|
||||
"image": null,
|
||||
"lines": []
|
||||
},
|
||||
"predictions": {
|
||||
"layout": {
|
||||
"clusters": [
|
||||
{
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 89.23887497045128,
|
||||
"t": 717.1685676116198,
|
||||
"r": 523.208764293368,
|
||||
"b": 764.898293373551,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.7318570613861084,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.2388782764286,
|
||||
"r_y0": 764.898293373551,
|
||||
"r_x1": 521.9863147998661,
|
||||
"r_y1": 764.898293373551,
|
||||
"r_x2": 521.9863147998661,
|
||||
"r_y2": 744.0929853494625,
|
||||
"r_x3": 89.2388782764286,
|
||||
"r_y3": 744.0929853494625,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.23887497045128,
|
||||
"r_y0": 739.1977118987292,
|
||||
"r_x1": 523.208764293368,
|
||||
"r_y1": 739.1977118987292,
|
||||
"r_x2": 523.208764293368,
|
||||
"r_y2": 717.1685676116198,
|
||||
"r_x3": 89.23887497045128,
|
||||
"r_y3": 717.1685676116198,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 441.2561096985719,
|
||||
"t": 690.0429592741025,
|
||||
"r": 522.0347860494834,
|
||||
"b": 710.0268078458798,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5982133150100708,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 441.2561096985719,
|
||||
"r_y0": 710.0268078458798,
|
||||
"r_x1": 522.0347860494834,
|
||||
"r_y1": 710.0268078458798,
|
||||
"r_x2": 522.0347860494834,
|
||||
"r_y2": 690.0429592741025,
|
||||
"r_x3": 441.2561096985719,
|
||||
"r_y3": 690.0429592741025,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"tablestructure": {
|
||||
"table_map": {}
|
||||
},
|
||||
"figures_classification": null,
|
||||
"equations_prediction": null,
|
||||
"vlm_response": null
|
||||
},
|
||||
"assembled": {
|
||||
"elements": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 89.23887497045128,
|
||||
"t": 717.1685676116198,
|
||||
"r": 523.208764293368,
|
||||
"b": 764.898293373551,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.7318570613861084,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.2388782764286,
|
||||
"r_y0": 764.898293373551,
|
||||
"r_x1": 521.9863147998661,
|
||||
"r_y1": 764.898293373551,
|
||||
"r_x2": 521.9863147998661,
|
||||
"r_y2": 744.0929853494625,
|
||||
"r_x3": 89.2388782764286,
|
||||
"r_y3": 744.0929853494625,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.23887497045128,
|
||||
"r_y0": 739.1977118987292,
|
||||
"r_x1": 523.208764293368,
|
||||
"r_y1": 739.1977118987292,
|
||||
"r_x2": 523.208764293368,
|
||||
"r_y2": 717.1685676116198,
|
||||
"r_x3": 89.23887497045128,
|
||||
"r_y3": 717.1685676116198,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
},
|
||||
{
|
||||
"label": "text",
|
||||
"id": 2,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 2,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 441.2561096985719,
|
||||
"t": 690.0429592741025,
|
||||
"r": 522.0347860494834,
|
||||
"b": 710.0268078458798,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5982133150100708,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 441.2561096985719,
|
||||
"r_y0": 710.0268078458798,
|
||||
"r_x1": 522.0347860494834,
|
||||
"r_y1": 710.0268078458798,
|
||||
"r_x2": 522.0347860494834,
|
||||
"r_y2": 690.0429592741025,
|
||||
"r_x3": 441.2561096985719,
|
||||
"r_y3": 690.0429592741025,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"body": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 89.23887497045128,
|
||||
"t": 717.1685676116198,
|
||||
"r": 523.208764293368,
|
||||
"b": 764.898293373551,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.7318570613861084,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.2388782764286,
|
||||
"r_y0": 764.898293373551,
|
||||
"r_x1": 521.9863147998661,
|
||||
"r_y1": 764.898293373551,
|
||||
"r_x2": 521.9863147998661,
|
||||
"r_y2": 744.0929853494625,
|
||||
"r_x3": 89.2388782764286,
|
||||
"r_y3": 744.0929853494625,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.23887497045128,
|
||||
"r_y0": 739.1977118987292,
|
||||
"r_x1": 523.208764293368,
|
||||
"r_y1": 739.1977118987292,
|
||||
"r_x2": 523.208764293368,
|
||||
"r_y2": 717.1685676116198,
|
||||
"r_x3": 89.23887497045128,
|
||||
"r_y3": 717.1685676116198,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
},
|
||||
{
|
||||
"label": "text",
|
||||
"id": 2,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 2,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 441.2561096985719,
|
||||
"t": 690.0429592741025,
|
||||
"r": 522.0347860494834,
|
||||
"b": 710.0268078458798,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5982133150100708,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 441.2561096985719,
|
||||
"r_y0": 710.0268078458798,
|
||||
"r_x1": 522.0347860494834,
|
||||
"r_y1": 710.0268078458798,
|
||||
"r_x2": 522.0347860494834,
|
||||
"r_y2": 690.0429592741025,
|
||||
"r_x3": 441.2561096985719,
|
||||
"r_y3": 690.0429592741025,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"headers": []
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -1,505 +0,0 @@
|
||||
[
|
||||
{
|
||||
"page_no": 0,
|
||||
"size": {
|
||||
"width": 841.9216918945312,
|
||||
"height": 595.201171875
|
||||
},
|
||||
"parsed_page": {
|
||||
"dimension": {
|
||||
"angle": 0.0,
|
||||
"rect": {
|
||||
"r_x0": 0.0,
|
||||
"r_y0": 0.0,
|
||||
"r_x1": 595.201171875,
|
||||
"r_y1": 0.0,
|
||||
"r_x2": 595.201171875,
|
||||
"r_y2": 841.9216918945312,
|
||||
"r_x3": 0.0,
|
||||
"r_y3": 841.9216918945312,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"boundary_type": "crop_box",
|
||||
"art_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"bleed_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"crop_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"media_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"trim_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
}
|
||||
},
|
||||
"bitmap_resources": [],
|
||||
"char_cells": [],
|
||||
"word_cells": [],
|
||||
"textline_cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 744.0930045534915,
|
||||
"r_y0": 504.87200373583954,
|
||||
"r_x1": 764.8982839673505,
|
||||
"r_y1": 504.87200373583954,
|
||||
"r_x2": 764.8982839673505,
|
||||
"r_y2": 73.34702001188118,
|
||||
"r_x3": 744.0930045534915,
|
||||
"r_y3": 73.34702001188118,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 717.168585936602,
|
||||
"r_y0": 504.8720061466397,
|
||||
"r_x1": 737.9738558137178,
|
||||
"r_y1": 504.8720061466397,
|
||||
"r_x2": 737.9738558137178,
|
||||
"r_y2": 70.90211682372312,
|
||||
"r_x3": 717.168585936602,
|
||||
"r_y3": 70.90211682372312,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 690.2441821046808,
|
||||
"r_y0": 152.80629773131633,
|
||||
"r_x1": 709.8255852011977,
|
||||
"r_y1": 152.80629773131633,
|
||||
"r_x2": 709.8255852011977,
|
||||
"r_y2": 72.124570639845,
|
||||
"r_x3": 690.2441821046808,
|
||||
"r_y3": 72.124570639845,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"has_chars": false,
|
||||
"has_words": false,
|
||||
"has_lines": true,
|
||||
"image": null,
|
||||
"lines": []
|
||||
},
|
||||
"predictions": {
|
||||
"layout": {
|
||||
"clusters": [
|
||||
{
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 717.168585936602,
|
||||
"t": 70.90211682372312,
|
||||
"r": 764.8982839673505,
|
||||
"b": 504.8720061466397,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6915205121040344,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 744.0930045534915,
|
||||
"r_y0": 504.87200373583954,
|
||||
"r_x1": 764.8982839673505,
|
||||
"r_y1": 504.87200373583954,
|
||||
"r_x2": 764.8982839673505,
|
||||
"r_y2": 73.34702001188118,
|
||||
"r_x3": 744.0930045534915,
|
||||
"r_y3": 73.34702001188118,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 717.168585936602,
|
||||
"r_y0": 504.8720061466397,
|
||||
"r_x1": 737.9738558137178,
|
||||
"r_y1": 504.8720061466397,
|
||||
"r_x2": 737.9738558137178,
|
||||
"r_y2": 70.90211682372312,
|
||||
"r_x3": 717.168585936602,
|
||||
"r_y3": 70.90211682372312,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 690.2441821046808,
|
||||
"t": 72.124570639845,
|
||||
"r": 709.8255852011977,
|
||||
"b": 152.80629773131633,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 1.0,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 690.2441821046808,
|
||||
"r_y0": 152.80629773131633,
|
||||
"r_x1": 709.8255852011977,
|
||||
"r_y1": 152.80629773131633,
|
||||
"r_x2": 709.8255852011977,
|
||||
"r_y2": 72.124570639845,
|
||||
"r_x3": 690.2441821046808,
|
||||
"r_y3": 72.124570639845,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"tablestructure": {
|
||||
"table_map": {}
|
||||
},
|
||||
"figures_classification": null,
|
||||
"equations_prediction": null,
|
||||
"vlm_response": null
|
||||
},
|
||||
"assembled": {
|
||||
"elements": [
|
||||
{
|
||||
"label": "page_header",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 717.168585936602,
|
||||
"t": 70.90211682372312,
|
||||
"r": 764.8982839673505,
|
||||
"b": 504.8720061466397,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6915205121040344,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 744.0930045534915,
|
||||
"r_y0": 504.87200373583954,
|
||||
"r_x1": 764.8982839673505,
|
||||
"r_y1": 504.87200373583954,
|
||||
"r_x2": 764.8982839673505,
|
||||
"r_y2": 73.34702001188118,
|
||||
"r_x3": 744.0930045534915,
|
||||
"r_y3": 73.34702001188118,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 717.168585936602,
|
||||
"r_y0": 504.8720061466397,
|
||||
"r_x1": 737.9738558137178,
|
||||
"r_y1": 504.8720061466397,
|
||||
"r_x2": 737.9738558137178,
|
||||
"r_y2": 70.90211682372312,
|
||||
"r_x3": 717.168585936602,
|
||||
"r_y3": 70.90211682372312,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
},
|
||||
{
|
||||
"label": "text",
|
||||
"id": 8,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 8,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 690.2441821046808,
|
||||
"t": 72.124570639845,
|
||||
"r": 709.8255852011977,
|
||||
"b": 152.80629773131633,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 1.0,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 690.2441821046808,
|
||||
"r_y0": 152.80629773131633,
|
||||
"r_x1": 709.8255852011977,
|
||||
"r_y1": 152.80629773131633,
|
||||
"r_x2": 709.8255852011977,
|
||||
"r_y2": 72.124570639845,
|
||||
"r_x3": 690.2441821046808,
|
||||
"r_y3": 72.124570639845,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"body": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 8,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 8,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 690.2441821046808,
|
||||
"t": 72.124570639845,
|
||||
"r": 709.8255852011977,
|
||||
"b": 152.80629773131633,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 1.0,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 690.2441821046808,
|
||||
"r_y0": 152.80629773131633,
|
||||
"r_x1": 709.8255852011977,
|
||||
"r_y1": 152.80629773131633,
|
||||
"r_x2": 709.8255852011977,
|
||||
"r_y2": 72.124570639845,
|
||||
"r_x3": 690.2441821046808,
|
||||
"r_y3": 72.124570639845,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"headers": [
|
||||
{
|
||||
"label": "page_header",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 717.168585936602,
|
||||
"t": 70.90211682372312,
|
||||
"r": 764.8982839673505,
|
||||
"b": 504.8720061466397,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6915205121040344,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 744.0930045534915,
|
||||
"r_y0": 504.87200373583954,
|
||||
"r_x1": 764.8982839673505,
|
||||
"r_y1": 504.87200373583954,
|
||||
"r_x2": 764.8982839673505,
|
||||
"r_y2": 73.34702001188118,
|
||||
"r_x3": 744.0930045534915,
|
||||
"r_y3": 73.34702001188118,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 717.168585936602,
|
||||
"r_y0": 504.8720061466397,
|
||||
"r_x1": 737.9738558137178,
|
||||
"r_y1": 504.8720061466397,
|
||||
"r_x2": 737.9738558137178,
|
||||
"r_y2": 70.90211682372312,
|
||||
"r_x3": 717.168585936602,
|
||||
"r_y3": 70.90211682372312,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -1,505 +0,0 @@
|
||||
[
|
||||
{
|
||||
"page_no": 0,
|
||||
"size": {
|
||||
"width": 841.9216918945312,
|
||||
"height": 595.201171875
|
||||
},
|
||||
"parsed_page": {
|
||||
"dimension": {
|
||||
"angle": 0.0,
|
||||
"rect": {
|
||||
"r_x0": 0.0,
|
||||
"r_y0": 0.0,
|
||||
"r_x1": 595.201171875,
|
||||
"r_y1": 0.0,
|
||||
"r_x2": 595.201171875,
|
||||
"r_y2": 841.9216918945312,
|
||||
"r_x3": 0.0,
|
||||
"r_y3": 841.9216918945312,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"boundary_type": "crop_box",
|
||||
"art_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"bleed_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"crop_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"media_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"trim_bbox": {
|
||||
"l": 0.0,
|
||||
"t": 841.9216918945312,
|
||||
"r": 595.201171875,
|
||||
"b": 0.0,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
}
|
||||
},
|
||||
"bitmap_resources": [],
|
||||
"char_cells": [],
|
||||
"word_cells": [],
|
||||
"textline_cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 77.10171545548258,
|
||||
"r_y0": 520.7638571913312,
|
||||
"r_x1": 96.68315797053792,
|
||||
"r_y1": 520.7638571913312,
|
||||
"r_x2": 96.68315797053792,
|
||||
"r_y2": 89.2388734673729,
|
||||
"r_x3": 77.10171545548258,
|
||||
"r_y3": 89.2388734673729,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 100.64168123325977,
|
||||
"r_y0": 523.3236155182395,
|
||||
"r_x1": 126.08064862014129,
|
||||
"r_y1": 523.3236155182395,
|
||||
"r_x2": 126.08064862014129,
|
||||
"r_y2": 89.1266754140729,
|
||||
"r_x3": 100.64168123325977,
|
||||
"r_y3": 89.1266754140729,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 131.21306574279092,
|
||||
"r_y0": 521.0762158417759,
|
||||
"r_x1": 152.19606490864376,
|
||||
"r_y1": 521.0762158417759,
|
||||
"r_x2": 152.19606490864376,
|
||||
"r_y2": 441.0071698212682,
|
||||
"r_x3": 131.21306574279092,
|
||||
"r_y3": 441.0071698212682,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"has_chars": false,
|
||||
"has_words": false,
|
||||
"has_lines": true,
|
||||
"image": null,
|
||||
"lines": []
|
||||
},
|
||||
"predictions": {
|
||||
"layout": {
|
||||
"clusters": [
|
||||
{
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 77.10171545548258,
|
||||
"t": 89.1266754140729,
|
||||
"r": 126.08064862014129,
|
||||
"b": 523.3236155182395,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6016772389411926,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 77.10171545548258,
|
||||
"r_y0": 520.7638571913312,
|
||||
"r_x1": 96.68315797053792,
|
||||
"r_y1": 520.7638571913312,
|
||||
"r_x2": 96.68315797053792,
|
||||
"r_y2": 89.2388734673729,
|
||||
"r_x3": 77.10171545548258,
|
||||
"r_y3": 89.2388734673729,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 100.64168123325977,
|
||||
"r_y0": 523.3236155182395,
|
||||
"r_x1": 126.08064862014129,
|
||||
"r_y1": 523.3236155182395,
|
||||
"r_x2": 126.08064862014129,
|
||||
"r_y2": 89.1266754140729,
|
||||
"r_x3": 100.64168123325977,
|
||||
"r_y3": 89.1266754140729,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 131.21306574279092,
|
||||
"t": 441.0071698212682,
|
||||
"r": 152.19606490864376,
|
||||
"b": 521.0762158417759,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5234212875366211,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 131.21306574279092,
|
||||
"r_y0": 521.0762158417759,
|
||||
"r_x1": 152.19606490864376,
|
||||
"r_y1": 521.0762158417759,
|
||||
"r_x2": 152.19606490864376,
|
||||
"r_y2": 441.0071698212682,
|
||||
"r_x3": 131.21306574279092,
|
||||
"r_y3": 441.0071698212682,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"tablestructure": {
|
||||
"table_map": {}
|
||||
},
|
||||
"figures_classification": null,
|
||||
"equations_prediction": null,
|
||||
"vlm_response": null
|
||||
},
|
||||
"assembled": {
|
||||
"elements": [
|
||||
{
|
||||
"label": "page_header",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 77.10171545548258,
|
||||
"t": 89.1266754140729,
|
||||
"r": 126.08064862014129,
|
||||
"b": 523.3236155182395,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6016772389411926,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 77.10171545548258,
|
||||
"r_y0": 520.7638571913312,
|
||||
"r_x1": 96.68315797053792,
|
||||
"r_y1": 520.7638571913312,
|
||||
"r_x2": 96.68315797053792,
|
||||
"r_y2": 89.2388734673729,
|
||||
"r_x3": 77.10171545548258,
|
||||
"r_y3": 89.2388734673729,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 100.64168123325977,
|
||||
"r_y0": 523.3236155182395,
|
||||
"r_x1": 126.08064862014129,
|
||||
"r_y1": 523.3236155182395,
|
||||
"r_x2": 126.08064862014129,
|
||||
"r_y2": 89.1266754140729,
|
||||
"r_x3": 100.64168123325977,
|
||||
"r_y3": 89.1266754140729,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
},
|
||||
{
|
||||
"label": "text",
|
||||
"id": 1,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 1,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 131.21306574279092,
|
||||
"t": 441.0071698212682,
|
||||
"r": 152.19606490864376,
|
||||
"b": 521.0762158417759,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5234212875366211,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 131.21306574279092,
|
||||
"r_y0": 521.0762158417759,
|
||||
"r_x1": 152.19606490864376,
|
||||
"r_y1": 521.0762158417759,
|
||||
"r_x2": 152.19606490864376,
|
||||
"r_y2": 441.0071698212682,
|
||||
"r_x3": 131.21306574279092,
|
||||
"r_y3": 441.0071698212682,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"body": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 1,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 1,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 131.21306574279092,
|
||||
"t": 441.0071698212682,
|
||||
"r": 152.19606490864376,
|
||||
"b": 521.0762158417759,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5234212875366211,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 131.21306574279092,
|
||||
"r_y0": 521.0762158417759,
|
||||
"r_x1": 152.19606490864376,
|
||||
"r_y1": 521.0762158417759,
|
||||
"r_x2": 152.19606490864376,
|
||||
"r_y2": 441.0071698212682,
|
||||
"r_x3": 131.21306574279092,
|
||||
"r_y3": 441.0071698212682,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"headers": [
|
||||
{
|
||||
"label": "page_header",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 77.10171545548258,
|
||||
"t": 89.1266754140729,
|
||||
"r": 126.08064862014129,
|
||||
"b": 523.3236155182395,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6016772389411926,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 77.10171545548258,
|
||||
"r_y0": 520.7638571913312,
|
||||
"r_x1": 96.68315797053792,
|
||||
"r_y1": 520.7638571913312,
|
||||
"r_x2": 96.68315797053792,
|
||||
"r_y2": 89.2388734673729,
|
||||
"r_x3": 77.10171545548258,
|
||||
"r_y3": 89.2388734673729,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 100.64168123325977,
|
||||
"r_y0": 523.3236155182395,
|
||||
"r_x1": 126.08064862014129,
|
||||
"r_y1": 523.3236155182395,
|
||||
"r_x2": 126.08064862014129,
|
||||
"r_y2": 89.1266754140729,
|
||||
"r_x3": 100.64168123325977,
|
||||
"r_y3": 89.1266754140729,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -1,311 +0,0 @@
|
||||
[
|
||||
{
|
||||
"page_no": 0,
|
||||
"size": {
|
||||
"width": 595.2,
|
||||
"height": 841.92
|
||||
},
|
||||
"predictions": {
|
||||
"layout": {
|
||||
"clusters": [
|
||||
{
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 69.68,
|
||||
"t": 77.0,
|
||||
"r": 504.87,
|
||||
"b": 152.91,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.862,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 73.35,
|
||||
"r_y0": 98.0,
|
||||
"r_x1": 503.65,
|
||||
"r_y1": 98.0,
|
||||
"r_x2": 503.65,
|
||||
"r_y2": 77.0,
|
||||
"r_x3": 73.35,
|
||||
"r_y3": 77.0,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 69.68,
|
||||
"r_y0": 124.83,
|
||||
"r_x1": 504.87,
|
||||
"r_y1": 124.83,
|
||||
"r_x2": 504.87,
|
||||
"r_y2": 104.0,
|
||||
"r_x3": 69.68,
|
||||
"r_y3": 104.0,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 71.84,
|
||||
"r_y0": 152.91,
|
||||
"r_x1": 153.09,
|
||||
"r_y1": 152.91,
|
||||
"r_x2": 153.09,
|
||||
"r_y2": 129.8,
|
||||
"r_x3": 71.84,
|
||||
"r_y3": 129.8,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"tablestructure": {
|
||||
"table_map": {}
|
||||
},
|
||||
"figures_classification": null,
|
||||
"equations_prediction": null,
|
||||
"vlm_response": null
|
||||
},
|
||||
"assembled": {
|
||||
"elements": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 69.68,
|
||||
"t": 77.0,
|
||||
"r": 504.87,
|
||||
"b": 152.91,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.862,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 73.35,
|
||||
"r_y0": 98.0,
|
||||
"r_x1": 503.65,
|
||||
"r_y1": 98.0,
|
||||
"r_x2": 503.65,
|
||||
"r_y2": 77.0,
|
||||
"r_x3": 73.35,
|
||||
"r_y3": 77.0,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 69.68,
|
||||
"r_y0": 124.83,
|
||||
"r_x1": 504.87,
|
||||
"r_y1": 124.83,
|
||||
"r_x2": 504.87,
|
||||
"r_y2": 104.0,
|
||||
"r_x3": 69.68,
|
||||
"r_y3": 104.0,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 71.84,
|
||||
"r_y0": 152.91,
|
||||
"r_x1": 153.09,
|
||||
"r_y1": 152.91,
|
||||
"r_x2": 153.09,
|
||||
"r_y2": 129.8,
|
||||
"r_x3": 71.84,
|
||||
"r_y3": 129.8,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"
|
||||
}
|
||||
],
|
||||
"body": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 69.68,
|
||||
"t": 77.0,
|
||||
"r": 504.87,
|
||||
"b": 152.91,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.862,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 73.35,
|
||||
"r_y0": 98.0,
|
||||
"r_x1": 503.65,
|
||||
"r_y1": 98.0,
|
||||
"r_x2": 503.65,
|
||||
"r_y2": 77.0,
|
||||
"r_x3": 73.35,
|
||||
"r_y3": 77.0,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 69.68,
|
||||
"r_y0": 124.83,
|
||||
"r_x1": 504.87,
|
||||
"r_y1": 124.83,
|
||||
"r_x2": 504.87,
|
||||
"r_y2": 104.0,
|
||||
"r_x3": 69.68,
|
||||
"r_y3": 104.0,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 71.84,
|
||||
"r_y0": 152.91,
|
||||
"r_x1": 153.09,
|
||||
"r_y1": 152.91,
|
||||
"r_x2": 153.09,
|
||||
"r_y2": 129.8,
|
||||
"r_x3": 71.84,
|
||||
"r_y3": 129.8,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"
|
||||
}
|
||||
],
|
||||
"headers": []
|
||||
},
|
||||
"parsed_page": null
|
||||
}
|
||||
]
|
||||
5
tests/data_scanned/groundtruth/docling_v2/ocr_test.pages.meta.json
vendored
Normal file
5
tests/data_scanned/groundtruth/docling_v2/ocr_test.pages.meta.json
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
[
|
||||
{
|
||||
"num_cells": 0
|
||||
}
|
||||
]
|
||||
@@ -1,311 +0,0 @@
|
||||
[
|
||||
{
|
||||
"page_no": 0,
|
||||
"size": {
|
||||
"width": 595.2,
|
||||
"height": 841.92
|
||||
},
|
||||
"predictions": {
|
||||
"layout": {
|
||||
"clusters": [
|
||||
{
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 89.24,
|
||||
"t": 690.04,
|
||||
"r": 523.21,
|
||||
"b": 764.9,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.787,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.24,
|
||||
"r_y0": 764.9,
|
||||
"r_x1": 521.99,
|
||||
"r_y1": 764.9,
|
||||
"r_x2": 521.99,
|
||||
"r_y2": 744.09,
|
||||
"r_x3": 89.24,
|
||||
"r_y3": 744.09,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.24,
|
||||
"r_y0": 739.2,
|
||||
"r_x1": 523.21,
|
||||
"r_y1": 739.2,
|
||||
"r_x2": 523.21,
|
||||
"r_y2": 717.17,
|
||||
"r_x3": 89.24,
|
||||
"r_y3": 717.17,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 441.26,
|
||||
"r_y0": 710.03,
|
||||
"r_x1": 522.03,
|
||||
"r_y1": 710.03,
|
||||
"r_x2": 522.03,
|
||||
"r_y2": 690.04,
|
||||
"r_x3": 441.26,
|
||||
"r_y3": 690.04,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"tablestructure": {
|
||||
"table_map": {}
|
||||
},
|
||||
"figures_classification": null,
|
||||
"equations_prediction": null,
|
||||
"vlm_response": null
|
||||
},
|
||||
"assembled": {
|
||||
"elements": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 89.24,
|
||||
"t": 690.04,
|
||||
"r": 523.21,
|
||||
"b": 764.9,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.787,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.24,
|
||||
"r_y0": 764.9,
|
||||
"r_x1": 521.99,
|
||||
"r_y1": 764.9,
|
||||
"r_x2": 521.99,
|
||||
"r_y2": 744.09,
|
||||
"r_x3": 89.24,
|
||||
"r_y3": 744.09,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.24,
|
||||
"r_y0": 739.2,
|
||||
"r_x1": 523.21,
|
||||
"r_y1": 739.2,
|
||||
"r_x2": 523.21,
|
||||
"r_y2": 717.17,
|
||||
"r_x3": 89.24,
|
||||
"r_y3": 717.17,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 441.26,
|
||||
"r_y0": 710.03,
|
||||
"r_x1": 522.03,
|
||||
"r_y1": 710.03,
|
||||
"r_x2": 522.03,
|
||||
"r_y2": 690.04,
|
||||
"r_x3": 441.26,
|
||||
"r_y3": 690.04,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"
|
||||
}
|
||||
],
|
||||
"body": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 89.24,
|
||||
"t": 690.04,
|
||||
"r": 523.21,
|
||||
"b": 764.9,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.787,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.24,
|
||||
"r_y0": 764.9,
|
||||
"r_x1": 521.99,
|
||||
"r_y1": 764.9,
|
||||
"r_x2": 521.99,
|
||||
"r_y2": 744.09,
|
||||
"r_x3": 89.24,
|
||||
"r_y3": 744.09,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.24,
|
||||
"r_y0": 739.2,
|
||||
"r_x1": 523.21,
|
||||
"r_y1": 739.2,
|
||||
"r_x2": 523.21,
|
||||
"r_y2": 717.17,
|
||||
"r_x3": 89.24,
|
||||
"r_y3": 717.17,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 441.26,
|
||||
"r_y0": 710.03,
|
||||
"r_x1": 522.03,
|
||||
"r_y1": 710.03,
|
||||
"r_x2": 522.03,
|
||||
"r_y2": 690.04,
|
||||
"r_x3": 441.26,
|
||||
"r_y3": 690.04,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"
|
||||
}
|
||||
],
|
||||
"headers": []
|
||||
},
|
||||
"parsed_page": null
|
||||
}
|
||||
]
|
||||
5
tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.pages.meta.json
vendored
Normal file
5
tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_180.pages.meta.json
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
[
|
||||
{
|
||||
"num_cells": 0
|
||||
}
|
||||
]
|
||||
@@ -1,311 +0,0 @@
|
||||
[
|
||||
{
|
||||
"page_no": 0,
|
||||
"size": {
|
||||
"width": 841.92,
|
||||
"height": 595.2
|
||||
},
|
||||
"predictions": {
|
||||
"layout": {
|
||||
"clusters": [
|
||||
{
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 690.24,
|
||||
"t": 70.9,
|
||||
"r": 764.9,
|
||||
"b": 504.87,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.636,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 744.09,
|
||||
"r_y0": 504.87,
|
||||
"r_x1": 764.9,
|
||||
"r_y1": 504.87,
|
||||
"r_x2": 764.9,
|
||||
"r_y2": 73.35,
|
||||
"r_x3": 744.09,
|
||||
"r_y3": 73.35,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 717.17,
|
||||
"r_y0": 504.87,
|
||||
"r_x1": 737.97,
|
||||
"r_y1": 504.87,
|
||||
"r_x2": 737.97,
|
||||
"r_y2": 70.9,
|
||||
"r_x3": 717.17,
|
||||
"r_y3": 70.9,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 690.24,
|
||||
"r_y0": 152.81,
|
||||
"r_x1": 709.83,
|
||||
"r_y1": 152.81,
|
||||
"r_x2": 709.83,
|
||||
"r_y2": 72.12,
|
||||
"r_x3": 690.24,
|
||||
"r_y3": 72.12,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"tablestructure": {
|
||||
"table_map": {}
|
||||
},
|
||||
"figures_classification": null,
|
||||
"equations_prediction": null,
|
||||
"vlm_response": null
|
||||
},
|
||||
"assembled": {
|
||||
"elements": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 690.24,
|
||||
"t": 70.9,
|
||||
"r": 764.9,
|
||||
"b": 504.87,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.636,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 744.09,
|
||||
"r_y0": 504.87,
|
||||
"r_x1": 764.9,
|
||||
"r_y1": 504.87,
|
||||
"r_x2": 764.9,
|
||||
"r_y2": 73.35,
|
||||
"r_x3": 744.09,
|
||||
"r_y3": 73.35,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 717.17,
|
||||
"r_y0": 504.87,
|
||||
"r_x1": 737.97,
|
||||
"r_y1": 504.87,
|
||||
"r_x2": 737.97,
|
||||
"r_y2": 70.9,
|
||||
"r_x3": 717.17,
|
||||
"r_y3": 70.9,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 690.24,
|
||||
"r_y0": 152.81,
|
||||
"r_x1": 709.83,
|
||||
"r_y1": 152.81,
|
||||
"r_x2": 709.83,
|
||||
"r_y2": 72.12,
|
||||
"r_x3": 690.24,
|
||||
"r_y3": 72.12,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"
|
||||
}
|
||||
],
|
||||
"body": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 690.24,
|
||||
"t": 70.9,
|
||||
"r": 764.9,
|
||||
"b": 504.87,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.636,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 744.09,
|
||||
"r_y0": 504.87,
|
||||
"r_x1": 764.9,
|
||||
"r_y1": 504.87,
|
||||
"r_x2": 764.9,
|
||||
"r_y2": 73.35,
|
||||
"r_x3": 744.09,
|
||||
"r_y3": 73.35,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 717.17,
|
||||
"r_y0": 504.87,
|
||||
"r_x1": 737.97,
|
||||
"r_y1": 504.87,
|
||||
"r_x2": 737.97,
|
||||
"r_y2": 70.9,
|
||||
"r_x3": 717.17,
|
||||
"r_y3": 70.9,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 690.24,
|
||||
"r_y0": 152.81,
|
||||
"r_x1": 709.83,
|
||||
"r_y1": 152.81,
|
||||
"r_x2": 709.83,
|
||||
"r_y2": 72.12,
|
||||
"r_x3": 690.24,
|
||||
"r_y3": 72.12,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"
|
||||
}
|
||||
],
|
||||
"headers": []
|
||||
},
|
||||
"parsed_page": null
|
||||
}
|
||||
]
|
||||
5
tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.pages.meta.json
vendored
Normal file
5
tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_270.pages.meta.json
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
[
|
||||
{
|
||||
"num_cells": 0
|
||||
}
|
||||
]
|
||||
@@ -1,368 +0,0 @@
|
||||
[
|
||||
{
|
||||
"page_no": 0,
|
||||
"size": {
|
||||
"width": 841.92,
|
||||
"height": 595.2
|
||||
},
|
||||
"predictions": {
|
||||
"layout": {
|
||||
"clusters": [
|
||||
{
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 77.1,
|
||||
"t": 89.13,
|
||||
"r": 126.08,
|
||||
"b": 523.32,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.601,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 77.1,
|
||||
"r_y0": 520.76,
|
||||
"r_x1": 96.68,
|
||||
"r_y1": 520.76,
|
||||
"r_x2": 96.68,
|
||||
"r_y2": 89.24,
|
||||
"r_x3": 77.1,
|
||||
"r_y3": 89.24,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 100.64,
|
||||
"r_y0": 523.32,
|
||||
"r_x1": 126.08,
|
||||
"r_y1": 523.32,
|
||||
"r_x2": 126.08,
|
||||
"r_y2": 89.13,
|
||||
"r_x3": 100.64,
|
||||
"r_y3": 89.13,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 131.21,
|
||||
"t": 441.01,
|
||||
"r": 152.2,
|
||||
"b": 521.08,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 1.0,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 131.21,
|
||||
"r_y0": 521.08,
|
||||
"r_x1": 152.2,
|
||||
"r_y1": 521.08,
|
||||
"r_x2": 152.2,
|
||||
"r_y2": 441.01,
|
||||
"r_x3": 131.21,
|
||||
"r_y3": 441.01,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"tablestructure": {
|
||||
"table_map": {}
|
||||
},
|
||||
"figures_classification": null,
|
||||
"equations_prediction": null,
|
||||
"vlm_response": null
|
||||
},
|
||||
"assembled": {
|
||||
"elements": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 77.1,
|
||||
"t": 89.13,
|
||||
"r": 126.08,
|
||||
"b": 523.32,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.601,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 77.1,
|
||||
"r_y0": 520.76,
|
||||
"r_x1": 96.68,
|
||||
"r_y1": 520.76,
|
||||
"r_x2": 96.68,
|
||||
"r_y2": 89.24,
|
||||
"r_x3": 77.1,
|
||||
"r_y3": 89.24,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 100.64,
|
||||
"r_y0": 523.32,
|
||||
"r_x1": 126.08,
|
||||
"r_y1": 523.32,
|
||||
"r_x2": 126.08,
|
||||
"r_y2": 89.13,
|
||||
"r_x3": 100.64,
|
||||
"r_y3": 89.13,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
},
|
||||
{
|
||||
"label": "text",
|
||||
"id": 11,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 11,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 131.21,
|
||||
"t": 441.01,
|
||||
"r": 152.2,
|
||||
"b": 521.08,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 1.0,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 131.21,
|
||||
"r_y0": 521.08,
|
||||
"r_x1": 152.2,
|
||||
"r_y1": 521.08,
|
||||
"r_x2": 152.2,
|
||||
"r_y2": 441.01,
|
||||
"r_x3": 131.21,
|
||||
"r_y3": 441.01,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"body": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 77.1,
|
||||
"t": 89.13,
|
||||
"r": 126.08,
|
||||
"b": 523.32,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.601,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 77.1,
|
||||
"r_y0": 520.76,
|
||||
"r_x1": 96.68,
|
||||
"r_y1": 520.76,
|
||||
"r_x2": 96.68,
|
||||
"r_y2": 89.24,
|
||||
"r_x3": 77.1,
|
||||
"r_y3": 89.24,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 100.64,
|
||||
"r_y0": 523.32,
|
||||
"r_x1": 126.08,
|
||||
"r_y1": 523.32,
|
||||
"r_x2": 126.08,
|
||||
"r_y2": 89.13,
|
||||
"r_x3": 100.64,
|
||||
"r_y3": 89.13,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
},
|
||||
{
|
||||
"label": "text",
|
||||
"id": 11,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 11,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 131.21,
|
||||
"t": 441.01,
|
||||
"r": 152.2,
|
||||
"b": 521.08,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 1.0,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 131.21,
|
||||
"r_y0": 521.08,
|
||||
"r_x1": 152.2,
|
||||
"r_y1": 521.08,
|
||||
"r_x2": 152.2,
|
||||
"r_y2": 441.01,
|
||||
"r_x3": 131.21,
|
||||
"r_y3": 441.01,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"headers": []
|
||||
},
|
||||
"parsed_page": null
|
||||
}
|
||||
]
|
||||
5
tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.pages.meta.json
vendored
Normal file
5
tests/data_scanned/groundtruth/docling_v2/ocr_test_rotated_90.pages.meta.json
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
[
|
||||
{
|
||||
"num_cells": 0
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user