mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
feat(ocr): auto-detect rotated pages in Tesseract (#1167)
* fix(ocr): tesseract support mis-oriented documents Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com> * fix(ocr): update missing test data Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com> * fix(ocr): rotate image to the natural orientation before layout prediction Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com> * fix(ocr): move bounding bow rotation util to orientation.py Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com> * fix(ocr): refactor rotation utilities Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com> * chore(ocr): revert layout updates Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com> * chore(ocr): update e2e OCR test data Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com> * fix(ocr): avoid to swallow tesseract errors causing orientation detection failures Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com> * chore(ocr): revert layout updates Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com> * chore(ocr): update e2e OCR test data * chore(ocr): proceed to OCR without rotation when OSD fails in `TesseractOcrCliModel` * chore(ocr): proceed to OCR without rotation when OSD fails in `TesseractOcrModel` * chore(ocr): default `TesseractOcrCliModel._is_auto` to `False` * fix(ocr): fix `TesseractOcrCliModel._is_auto` computation * chore(ocr): improve logging in case of OSD failure in `TesseractOcrCliModel` and `TesseractOcrModel` --------- Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>
This commit is contained in:
@@ -44,9 +44,9 @@
|
||||
"prov": [
|
||||
{
|
||||
"bbox": [
|
||||
69.6796630536824,
|
||||
689.0124221922704,
|
||||
504.8720051760782,
|
||||
70.90211866351085,
|
||||
689.216658542347,
|
||||
504.8720079864275,
|
||||
764.9216921155637
|
||||
],
|
||||
"page": 1,
|
||||
|
||||
@@ -40,14 +40,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 69.6796630536824,
|
||||
"r_y0": 124.83139494707741,
|
||||
"r_x1": 504.8720051760782,
|
||||
"r_y1": 124.83139494707741,
|
||||
"r_x2": 504.8720051760782,
|
||||
"r_y2": 104.00000011573796,
|
||||
"r_x3": 69.6796630536824,
|
||||
"r_y3": 104.00000011573796,
|
||||
"r_x0": 70.90211866351085,
|
||||
"r_y0": 124.83139551297342,
|
||||
"r_x1": 504.8720079864275,
|
||||
"r_y1": 124.83139551297342,
|
||||
"r_x2": 504.8720079864275,
|
||||
"r_y2": 102.66666671251768,
|
||||
"r_x3": 70.90211866351085,
|
||||
"r_y3": 102.66666671251768,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
@@ -65,14 +65,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 71.84193505100733,
|
||||
"r_y0": 152.90926970226084,
|
||||
"r_x1": 153.088934155825,
|
||||
"r_y1": 152.90926970226084,
|
||||
"r_x2": 153.088934155825,
|
||||
"r_y2": 129.797125232046,
|
||||
"r_x3": 71.84193505100733,
|
||||
"r_y3": 129.797125232046,
|
||||
"r_x0": 73.10852522817731,
|
||||
"r_y0": 152.70503335218433,
|
||||
"r_x1": 153.04479435252625,
|
||||
"r_y1": 152.70503335218433,
|
||||
"r_x2": 153.04479435252625,
|
||||
"r_y2": 130.00136157890958,
|
||||
"r_x3": 73.10852522817731,
|
||||
"r_y3": 130.00136157890958,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
@@ -90,13 +90,13 @@
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 69.6796630536824,
|
||||
"l": 70.90211866351085,
|
||||
"t": 76.99999977896756,
|
||||
"r": 504.8720051760782,
|
||||
"b": 152.90926970226084,
|
||||
"r": 504.8720079864275,
|
||||
"b": 152.70503335218433,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9715732336044312,
|
||||
"confidence": 0.9715733528137207,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@@ -132,14 +132,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 69.6796630536824,
|
||||
"r_y0": 124.83139494707741,
|
||||
"r_x1": 504.8720051760782,
|
||||
"r_y1": 124.83139494707741,
|
||||
"r_x2": 504.8720051760782,
|
||||
"r_y2": 104.00000011573796,
|
||||
"r_x3": 69.6796630536824,
|
||||
"r_y3": 104.00000011573796,
|
||||
"r_x0": 70.90211866351085,
|
||||
"r_y0": 124.83139551297342,
|
||||
"r_x1": 504.8720079864275,
|
||||
"r_y1": 124.83139551297342,
|
||||
"r_x2": 504.8720079864275,
|
||||
"r_y2": 102.66666671251768,
|
||||
"r_x3": 70.90211866351085,
|
||||
"r_y3": 102.66666671251768,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
@@ -157,14 +157,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 71.84193505100733,
|
||||
"r_y0": 152.90926970226084,
|
||||
"r_x1": 153.088934155825,
|
||||
"r_y1": 152.90926970226084,
|
||||
"r_x2": 153.088934155825,
|
||||
"r_y2": 129.797125232046,
|
||||
"r_x3": 71.84193505100733,
|
||||
"r_y3": 129.797125232046,
|
||||
"r_x0": 73.10852522817731,
|
||||
"r_y0": 152.70503335218433,
|
||||
"r_x1": 153.04479435252625,
|
||||
"r_y1": 152.70503335218433,
|
||||
"r_x2": 153.04479435252625,
|
||||
"r_y2": 130.00136157890958,
|
||||
"r_x3": 73.10852522817731,
|
||||
"r_y3": 130.00136157890958,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
@@ -195,13 +195,13 @@
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 69.6796630536824,
|
||||
"l": 70.90211866351085,
|
||||
"t": 76.99999977896756,
|
||||
"r": 504.8720051760782,
|
||||
"b": 152.90926970226084,
|
||||
"r": 504.8720079864275,
|
||||
"b": 152.70503335218433,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9715732336044312,
|
||||
"confidence": 0.9715733528137207,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@@ -237,14 +237,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 69.6796630536824,
|
||||
"r_y0": 124.83139494707741,
|
||||
"r_x1": 504.8720051760782,
|
||||
"r_y1": 124.83139494707741,
|
||||
"r_x2": 504.8720051760782,
|
||||
"r_y2": 104.00000011573796,
|
||||
"r_x3": 69.6796630536824,
|
||||
"r_y3": 104.00000011573796,
|
||||
"r_x0": 70.90211866351085,
|
||||
"r_y0": 124.83139551297342,
|
||||
"r_x1": 504.8720079864275,
|
||||
"r_y1": 124.83139551297342,
|
||||
"r_x2": 504.8720079864275,
|
||||
"r_y2": 102.66666671251768,
|
||||
"r_x3": 70.90211866351085,
|
||||
"r_y3": 102.66666671251768,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
@@ -262,14 +262,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 71.84193505100733,
|
||||
"r_y0": 152.90926970226084,
|
||||
"r_x1": 153.088934155825,
|
||||
"r_y1": 152.90926970226084,
|
||||
"r_x2": 153.088934155825,
|
||||
"r_y2": 129.797125232046,
|
||||
"r_x3": 71.84193505100733,
|
||||
"r_y3": 129.797125232046,
|
||||
"r_x0": 73.10852522817731,
|
||||
"r_y0": 152.70503335218433,
|
||||
"r_x1": 153.04479435252625,
|
||||
"r_y1": 152.70503335218433,
|
||||
"r_x2": 153.04479435252625,
|
||||
"r_y2": 130.00136157890958,
|
||||
"r_x3": 73.10852522817731,
|
||||
"r_y3": 130.00136157890958,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
@@ -293,13 +293,13 @@
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 69.6796630536824,
|
||||
"l": 70.90211866351085,
|
||||
"t": 76.99999977896756,
|
||||
"r": 504.8720051760782,
|
||||
"b": 152.90926970226084,
|
||||
"r": 504.8720079864275,
|
||||
"b": 152.70503335218433,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9715732336044312,
|
||||
"confidence": 0.9715733528137207,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@@ -335,14 +335,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 69.6796630536824,
|
||||
"r_y0": 124.83139494707741,
|
||||
"r_x1": 504.8720051760782,
|
||||
"r_y1": 124.83139494707741,
|
||||
"r_x2": 504.8720051760782,
|
||||
"r_y2": 104.00000011573796,
|
||||
"r_x3": 69.6796630536824,
|
||||
"r_y3": 104.00000011573796,
|
||||
"r_x0": 70.90211866351085,
|
||||
"r_y0": 124.83139551297342,
|
||||
"r_x1": 504.8720079864275,
|
||||
"r_y1": 124.83139551297342,
|
||||
"r_x2": 504.8720079864275,
|
||||
"r_y2": 102.66666671251768,
|
||||
"r_x3": 70.90211866351085,
|
||||
"r_y3": 102.66666671251768,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
@@ -360,14 +360,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 71.84193505100733,
|
||||
"r_y0": 152.90926970226084,
|
||||
"r_x1": 153.088934155825,
|
||||
"r_y1": 152.90926970226084,
|
||||
"r_x2": 153.088934155825,
|
||||
"r_y2": 129.797125232046,
|
||||
"r_x3": 71.84193505100733,
|
||||
"r_y3": 129.797125232046,
|
||||
"r_x0": 73.10852522817731,
|
||||
"r_y0": 152.70503335218433,
|
||||
"r_x1": 153.04479435252625,
|
||||
"r_y1": 152.70503335218433,
|
||||
"r_x2": 153.04479435252625,
|
||||
"r_y2": 130.00136157890958,
|
||||
"r_x3": 73.10852522817731,
|
||||
"r_y3": 130.00136157890958,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
<document>
|
||||
<paragraph><location><page_1><loc_16><loc_12><loc_18><loc_26></location>package</paragraph>
|
||||
</document>
|
||||
@@ -0,0 +1 @@
|
||||
{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "ocr_test_rotated.pdf", "filename-prov": null, "document-hash": "4a282813d93824eaa9bc2a0b2a0d6d626ecc8f5f380bd1320e2dd3e8e53c2ba6", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [{"hash": "f8a4dc72d8b159f69d0bc968b97f3fb9e0ac59dcb3113492432755835935d9b3", "model": "default", "page": 1}]}, "main-text": [{"prov": [{"bbox": [131.21306574279092, 74.12495603322407, 152.19606490864376, 154.19400205373182], "page": 1, "span": [0, 7], "__ref_s3_data": null}], "text": "package", "type": "paragraph", "payload": null, "name": "Text", "font": null}], "figures": [], "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 595.201171875, "page": 1, "width": 841.9216918945312}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null}
|
||||
@@ -0,0 +1 @@
|
||||
package
|
||||
@@ -0,0 +1 @@
|
||||
[{"page_no": 0, "size": {"width": 841.9216918945312, "height": 595.201171875}, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "package"}], "headers": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}]}}]
|
||||
@@ -0,0 +1,4 @@
|
||||
<document>
|
||||
<paragraph><location><page_1><loc_74><loc_16><loc_88><loc_18></location>package</paragraph>
|
||||
<paragraph><location><page_1><loc_15><loc_9><loc_88><loc_15></location>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained</paragraph>
|
||||
</document>
|
||||
@@ -0,0 +1,106 @@
|
||||
{
|
||||
"_name": "",
|
||||
"type": "pdf-document",
|
||||
"description": {
|
||||
"title": null,
|
||||
"abstract": null,
|
||||
"authors": null,
|
||||
"affiliations": null,
|
||||
"subjects": null,
|
||||
"keywords": null,
|
||||
"publication_date": null,
|
||||
"languages": null,
|
||||
"license": null,
|
||||
"publishers": null,
|
||||
"url_refs": null,
|
||||
"references": null,
|
||||
"publication": null,
|
||||
"reference_count": null,
|
||||
"citation_count": null,
|
||||
"citation_date": null,
|
||||
"advanced": null,
|
||||
"analytics": null,
|
||||
"logs": [],
|
||||
"collection": null,
|
||||
"acquisition": null
|
||||
},
|
||||
"file-info": {
|
||||
"filename": "ocr_test_rotated_180.pdf",
|
||||
"filename-prov": null,
|
||||
"document-hash": "a9cbfe0f2a71171face9ee31d2347ca4195649670ad75680520d67d4a863f982",
|
||||
"#-pages": 1,
|
||||
"collection-name": null,
|
||||
"description": null,
|
||||
"page-hashes": [
|
||||
{
|
||||
"hash": "baca27070f05dd84cf0903ded39bcf0fc1fa6ef0ac390e79cf8ba90c8c33ba49",
|
||||
"model": "default",
|
||||
"page": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"main-text": [
|
||||
{
|
||||
"prov": [
|
||||
{
|
||||
"bbox": [
|
||||
441.304584329099,
|
||||
132.09610360960653,
|
||||
521.9863114205704,
|
||||
151.67751306395223
|
||||
],
|
||||
"page": 1,
|
||||
"span": [
|
||||
0,
|
||||
7
|
||||
],
|
||||
"__ref_s3_data": null
|
||||
}
|
||||
],
|
||||
"text": "package",
|
||||
"type": "paragraph",
|
||||
"payload": null,
|
||||
"name": "Text",
|
||||
"font": null
|
||||
},
|
||||
{
|
||||
"prov": [
|
||||
{
|
||||
"bbox": [
|
||||
89.12133215549848,
|
||||
77.02339849621205,
|
||||
523.3501733013318,
|
||||
124.86176457554109
|
||||
],
|
||||
"page": 1,
|
||||
"span": [
|
||||
0,
|
||||
86
|
||||
],
|
||||
"__ref_s3_data": null
|
||||
}
|
||||
],
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained",
|
||||
"type": "paragraph",
|
||||
"payload": null,
|
||||
"name": "Text",
|
||||
"font": null
|
||||
}
|
||||
],
|
||||
"figures": [],
|
||||
"tables": [],
|
||||
"bitmaps": null,
|
||||
"equations": [],
|
||||
"footnotes": [],
|
||||
"page-dimensions": [
|
||||
{
|
||||
"height": 841.9216918945312,
|
||||
"page": 1,
|
||||
"width": 595.201171875
|
||||
}
|
||||
],
|
||||
"page-footers": [],
|
||||
"page-headers": [],
|
||||
"_s3_data": null,
|
||||
"identifiers": null
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
package
|
||||
|
||||
Docling bundles PDF document conversion to JSON and Markdown in an easy self contained
|
||||
@@ -0,0 +1,445 @@
|
||||
[
|
||||
{
|
||||
"page_no": 0,
|
||||
"size": {
|
||||
"width": 595.201171875,
|
||||
"height": 841.9216918945312
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 90.46133071208328,
|
||||
"r_y0": 764.8982933983192,
|
||||
"r_x1": 520.7638616365624,
|
||||
"r_y1": 764.8982933983192,
|
||||
"r_x2": 520.7638616365624,
|
||||
"r_y2": 744.0929853742306,
|
||||
"r_x3": 90.46133071208328,
|
||||
"r_y3": 744.0929853742306,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.12133215549848,
|
||||
"r_y0": 741.5247710689902,
|
||||
"r_x1": 523.3501733013318,
|
||||
"r_y1": 741.5247710689902,
|
||||
"r_x2": 523.3501733013318,
|
||||
"r_y2": 717.0599273189902,
|
||||
"r_x3": 89.12133215549848,
|
||||
"r_y3": 717.0599273189902,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 441.304584329099,
|
||||
"r_y0": 709.8255882849247,
|
||||
"r_x1": 521.9863114205704,
|
||||
"r_y1": 709.8255882849247,
|
||||
"r_x2": 521.9863114205704,
|
||||
"r_y2": 690.244178830579,
|
||||
"r_x3": 441.304584329099,
|
||||
"r_y3": 690.244178830579,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"parsed_page": null,
|
||||
"predictions": {
|
||||
"layout": {
|
||||
"clusters": [
|
||||
{
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 89.12133215549848,
|
||||
"t": 717.0599273189902,
|
||||
"r": 523.3501733013318,
|
||||
"b": 764.8982933983192,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.7318570613861084,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 90.46133071208328,
|
||||
"r_y0": 764.8982933983192,
|
||||
"r_x1": 520.7638616365624,
|
||||
"r_y1": 764.8982933983192,
|
||||
"r_x2": 520.7638616365624,
|
||||
"r_y2": 744.0929853742306,
|
||||
"r_x3": 90.46133071208328,
|
||||
"r_y3": 744.0929853742306,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.12133215549848,
|
||||
"r_y0": 741.5247710689902,
|
||||
"r_x1": 523.3501733013318,
|
||||
"r_y1": 741.5247710689902,
|
||||
"r_x2": 523.3501733013318,
|
||||
"r_y2": 717.0599273189902,
|
||||
"r_x3": 89.12133215549848,
|
||||
"r_y3": 717.0599273189902,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 441.304584329099,
|
||||
"t": 690.244178830579,
|
||||
"r": 521.9863114205704,
|
||||
"b": 709.8255882849247,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5982133150100708,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 441.304584329099,
|
||||
"r_y0": 709.8255882849247,
|
||||
"r_x1": 521.9863114205704,
|
||||
"r_y1": 709.8255882849247,
|
||||
"r_x2": 521.9863114205704,
|
||||
"r_y2": 690.244178830579,
|
||||
"r_x3": 441.304584329099,
|
||||
"r_y3": 690.244178830579,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"tablestructure": {
|
||||
"table_map": {}
|
||||
},
|
||||
"figures_classification": null,
|
||||
"equations_prediction": null,
|
||||
"vlm_response": null
|
||||
},
|
||||
"assembled": {
|
||||
"elements": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 89.12133215549848,
|
||||
"t": 717.0599273189902,
|
||||
"r": 523.3501733013318,
|
||||
"b": 764.8982933983192,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.7318570613861084,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 90.46133071208328,
|
||||
"r_y0": 764.8982933983192,
|
||||
"r_x1": 520.7638616365624,
|
||||
"r_y1": 764.8982933983192,
|
||||
"r_x2": 520.7638616365624,
|
||||
"r_y2": 744.0929853742306,
|
||||
"r_x3": 90.46133071208328,
|
||||
"r_y3": 744.0929853742306,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.12133215549848,
|
||||
"r_y0": 741.5247710689902,
|
||||
"r_x1": 523.3501733013318,
|
||||
"r_y1": 741.5247710689902,
|
||||
"r_x2": 523.3501733013318,
|
||||
"r_y2": 717.0599273189902,
|
||||
"r_x3": 89.12133215549848,
|
||||
"r_y3": 717.0599273189902,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
},
|
||||
{
|
||||
"label": "text",
|
||||
"id": 2,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 2,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 441.304584329099,
|
||||
"t": 690.244178830579,
|
||||
"r": 521.9863114205704,
|
||||
"b": 709.8255882849247,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5982133150100708,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 441.304584329099,
|
||||
"r_y0": 709.8255882849247,
|
||||
"r_x1": 521.9863114205704,
|
||||
"r_y1": 709.8255882849247,
|
||||
"r_x2": 521.9863114205704,
|
||||
"r_y2": 690.244178830579,
|
||||
"r_x3": 441.304584329099,
|
||||
"r_y3": 690.244178830579,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"body": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 89.12133215549848,
|
||||
"t": 717.0599273189902,
|
||||
"r": 523.3501733013318,
|
||||
"b": 764.8982933983192,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.7318570613861084,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 90.46133071208328,
|
||||
"r_y0": 764.8982933983192,
|
||||
"r_x1": 520.7638616365624,
|
||||
"r_y1": 764.8982933983192,
|
||||
"r_x2": 520.7638616365624,
|
||||
"r_y2": 744.0929853742306,
|
||||
"r_x3": 90.46133071208328,
|
||||
"r_y3": 744.0929853742306,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.12133215549848,
|
||||
"r_y0": 741.5247710689902,
|
||||
"r_x1": 523.3501733013318,
|
||||
"r_y1": 741.5247710689902,
|
||||
"r_x2": 523.3501733013318,
|
||||
"r_y2": 717.0599273189902,
|
||||
"r_x3": 89.12133215549848,
|
||||
"r_y3": 717.0599273189902,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
},
|
||||
{
|
||||
"label": "text",
|
||||
"id": 2,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 2,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 441.304584329099,
|
||||
"t": 690.244178830579,
|
||||
"r": 521.9863114205704,
|
||||
"b": 709.8255882849247,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5982133150100708,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 441.304584329099,
|
||||
"r_y0": 709.8255882849247,
|
||||
"r_x1": 521.9863114205704,
|
||||
"r_y1": 709.8255882849247,
|
||||
"r_x2": 521.9863114205704,
|
||||
"r_y2": 690.244178830579,
|
||||
"r_x3": 441.304584329099,
|
||||
"r_y3": 690.244178830579,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"headers": []
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,3 @@
|
||||
<document>
|
||||
<paragraph><location><page_1><loc_82><loc_74><loc_84><loc_88></location>package</paragraph>
|
||||
</document>
|
||||
@@ -0,0 +1,83 @@
|
||||
{
|
||||
"_name": "",
|
||||
"type": "pdf-document",
|
||||
"description": {
|
||||
"title": null,
|
||||
"abstract": null,
|
||||
"authors": null,
|
||||
"affiliations": null,
|
||||
"subjects": null,
|
||||
"keywords": null,
|
||||
"publication_date": null,
|
||||
"languages": null,
|
||||
"license": null,
|
||||
"publishers": null,
|
||||
"url_refs": null,
|
||||
"references": null,
|
||||
"publication": null,
|
||||
"reference_count": null,
|
||||
"citation_count": null,
|
||||
"citation_date": null,
|
||||
"advanced": null,
|
||||
"analytics": null,
|
||||
"logs": [],
|
||||
"collection": null,
|
||||
"acquisition": null
|
||||
},
|
||||
"file-info": {
|
||||
"filename": "ocr_test_rotated_270.pdf",
|
||||
"filename-prov": null,
|
||||
"document-hash": "52f54e7183bdb73aa3713c7b169baca93e276963a138418c26e7d6a1ea128f14",
|
||||
"#-pages": 1,
|
||||
"collection-name": null,
|
||||
"description": null,
|
||||
"page-hashes": [
|
||||
{
|
||||
"hash": "59bc9ddba89e7b008185dd16d384493beb034686e5670546786390c5d237a304",
|
||||
"model": "default",
|
||||
"page": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"main-text": [
|
||||
{
|
||||
"prov": [
|
||||
{
|
||||
"bbox": [
|
||||
691.4680194659409,
|
||||
442.3948768148814,
|
||||
709.8255850278712,
|
||||
523.0765988200898
|
||||
],
|
||||
"page": 1,
|
||||
"span": [
|
||||
0,
|
||||
7
|
||||
],
|
||||
"__ref_s3_data": null
|
||||
}
|
||||
],
|
||||
"text": "package",
|
||||
"type": "paragraph",
|
||||
"payload": null,
|
||||
"name": "Text",
|
||||
"font": null
|
||||
}
|
||||
],
|
||||
"figures": [],
|
||||
"tables": [],
|
||||
"bitmaps": null,
|
||||
"equations": [],
|
||||
"footnotes": [],
|
||||
"page-dimensions": [
|
||||
{
|
||||
"height": 595.201171875,
|
||||
"page": 1,
|
||||
"width": 841.9216918945312
|
||||
}
|
||||
],
|
||||
"page-footers": [],
|
||||
"page-headers": [],
|
||||
"_s3_data": null,
|
||||
"identifiers": null
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
package
|
||||
@@ -0,0 +1,446 @@
|
||||
[
|
||||
{
|
||||
"page_no": 0,
|
||||
"size": {
|
||||
"width": 841.9216918945312,
|
||||
"height": 595.201171875
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 744.0930045534915,
|
||||
"r_y0": 504.87200373583954,
|
||||
"r_x1": 764.8982839673505,
|
||||
"r_y1": 504.87200373583954,
|
||||
"r_x2": 764.8982839673505,
|
||||
"r_y2": 73.34702001188118,
|
||||
"r_x3": 744.0930045534915,
|
||||
"r_y3": 73.34702001188118,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 717.1685859527342,
|
||||
"r_y0": 504.8720063438988,
|
||||
"r_x1": 737.9738558298501,
|
||||
"r_y1": 504.8720063438988,
|
||||
"r_x2": 737.9738558298501,
|
||||
"r_y2": 70.90211702098213,
|
||||
"r_x3": 717.1685859527342,
|
||||
"r_y3": 70.90211702098213,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 691.4680194659409,
|
||||
"r_y0": 152.80629506011857,
|
||||
"r_x1": 709.8255850278712,
|
||||
"r_y1": 152.80629506011857,
|
||||
"r_x2": 709.8255850278712,
|
||||
"r_y2": 72.12457305491027,
|
||||
"r_x3": 691.4680194659409,
|
||||
"r_y3": 72.12457305491027,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"parsed_page": null,
|
||||
"predictions": {
|
||||
"layout": {
|
||||
"clusters": [
|
||||
{
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 717.1685859527342,
|
||||
"t": 70.90211702098213,
|
||||
"r": 764.8982839673505,
|
||||
"b": 504.8720063438988,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6915205121040344,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 744.0930045534915,
|
||||
"r_y0": 504.87200373583954,
|
||||
"r_x1": 764.8982839673505,
|
||||
"r_y1": 504.87200373583954,
|
||||
"r_x2": 764.8982839673505,
|
||||
"r_y2": 73.34702001188118,
|
||||
"r_x3": 744.0930045534915,
|
||||
"r_y3": 73.34702001188118,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 717.1685859527342,
|
||||
"r_y0": 504.8720063438988,
|
||||
"r_x1": 737.9738558298501,
|
||||
"r_y1": 504.8720063438988,
|
||||
"r_x2": 737.9738558298501,
|
||||
"r_y2": 70.90211702098213,
|
||||
"r_x3": 717.1685859527342,
|
||||
"r_y3": 70.90211702098213,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 691.4680194659409,
|
||||
"t": 72.12457305491027,
|
||||
"r": 709.8255850278712,
|
||||
"b": 152.80629506011857,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 1.0,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 691.4680194659409,
|
||||
"r_y0": 152.80629506011857,
|
||||
"r_x1": 709.8255850278712,
|
||||
"r_y1": 152.80629506011857,
|
||||
"r_x2": 709.8255850278712,
|
||||
"r_y2": 72.12457305491027,
|
||||
"r_x3": 691.4680194659409,
|
||||
"r_y3": 72.12457305491027,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"tablestructure": {
|
||||
"table_map": {}
|
||||
},
|
||||
"figures_classification": null,
|
||||
"equations_prediction": null,
|
||||
"vlm_response": null
|
||||
},
|
||||
"assembled": {
|
||||
"elements": [
|
||||
{
|
||||
"label": "page_header",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 717.1685859527342,
|
||||
"t": 70.90211702098213,
|
||||
"r": 764.8982839673505,
|
||||
"b": 504.8720063438988,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6915205121040344,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 744.0930045534915,
|
||||
"r_y0": 504.87200373583954,
|
||||
"r_x1": 764.8982839673505,
|
||||
"r_y1": 504.87200373583954,
|
||||
"r_x2": 764.8982839673505,
|
||||
"r_y2": 73.34702001188118,
|
||||
"r_x3": 744.0930045534915,
|
||||
"r_y3": 73.34702001188118,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 717.1685859527342,
|
||||
"r_y0": 504.8720063438988,
|
||||
"r_x1": 737.9738558298501,
|
||||
"r_y1": 504.8720063438988,
|
||||
"r_x2": 737.9738558298501,
|
||||
"r_y2": 70.90211702098213,
|
||||
"r_x3": 717.1685859527342,
|
||||
"r_y3": 70.90211702098213,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
},
|
||||
{
|
||||
"label": "text",
|
||||
"id": 8,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 8,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 691.4680194659409,
|
||||
"t": 72.12457305491027,
|
||||
"r": 709.8255850278712,
|
||||
"b": 152.80629506011857,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 1.0,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 691.4680194659409,
|
||||
"r_y0": 152.80629506011857,
|
||||
"r_x1": 709.8255850278712,
|
||||
"r_y1": 152.80629506011857,
|
||||
"r_x2": 709.8255850278712,
|
||||
"r_y2": 72.12457305491027,
|
||||
"r_x3": 691.4680194659409,
|
||||
"r_y3": 72.12457305491027,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"body": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 8,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 8,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 691.4680194659409,
|
||||
"t": 72.12457305491027,
|
||||
"r": 709.8255850278712,
|
||||
"b": 152.80629506011857,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 1.0,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 691.4680194659409,
|
||||
"r_y0": 152.80629506011857,
|
||||
"r_x1": 709.8255850278712,
|
||||
"r_y1": 152.80629506011857,
|
||||
"r_x2": 709.8255850278712,
|
||||
"r_y2": 72.12457305491027,
|
||||
"r_x3": 691.4680194659409,
|
||||
"r_y3": 72.12457305491027,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"headers": [
|
||||
{
|
||||
"label": "page_header",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 717.1685859527342,
|
||||
"t": 70.90211702098213,
|
||||
"r": 764.8982839673505,
|
||||
"b": 504.8720063438988,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6915205121040344,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 744.0930045534915,
|
||||
"r_y0": 504.87200373583954,
|
||||
"r_x1": 764.8982839673505,
|
||||
"r_y1": 504.87200373583954,
|
||||
"r_x2": 764.8982839673505,
|
||||
"r_y2": 73.34702001188118,
|
||||
"r_x3": 744.0930045534915,
|
||||
"r_y3": 73.34702001188118,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 717.1685859527342,
|
||||
"r_y0": 504.8720063438988,
|
||||
"r_x1": 737.9738558298501,
|
||||
"r_y1": 504.8720063438988,
|
||||
"r_x2": 737.9738558298501,
|
||||
"r_y2": 70.90211702098213,
|
||||
"r_x3": 717.1685859527342,
|
||||
"r_y3": 70.90211702098213,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,3 @@
|
||||
<document>
|
||||
<paragraph><location><page_1><loc_16><loc_12><loc_18><loc_26></location>package</paragraph>
|
||||
</document>
|
||||
@@ -0,0 +1,83 @@
|
||||
{
|
||||
"_name": "",
|
||||
"type": "pdf-document",
|
||||
"description": {
|
||||
"title": null,
|
||||
"abstract": null,
|
||||
"authors": null,
|
||||
"affiliations": null,
|
||||
"subjects": null,
|
||||
"keywords": null,
|
||||
"publication_date": null,
|
||||
"languages": null,
|
||||
"license": null,
|
||||
"publishers": null,
|
||||
"url_refs": null,
|
||||
"references": null,
|
||||
"publication": null,
|
||||
"reference_count": null,
|
||||
"citation_count": null,
|
||||
"citation_date": null,
|
||||
"advanced": null,
|
||||
"analytics": null,
|
||||
"logs": [],
|
||||
"collection": null,
|
||||
"acquisition": null
|
||||
},
|
||||
"file-info": {
|
||||
"filename": "ocr_test_rotated_90.pdf",
|
||||
"filename-prov": null,
|
||||
"document-hash": "4a282813d93824eaa9bc2a0b2a0d6d626ecc8f5f380bd1320e2dd3e8e53c2ba6",
|
||||
"#-pages": 1,
|
||||
"collection-name": null,
|
||||
"description": null,
|
||||
"page-hashes": [
|
||||
{
|
||||
"hash": "f8a4dc72d8b159f69d0bc968b97f3fb9e0ac59dcb3113492432755835935d9b3",
|
||||
"model": "default",
|
||||
"page": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"main-text": [
|
||||
{
|
||||
"prov": [
|
||||
{
|
||||
"bbox": [
|
||||
131.21306574279092,
|
||||
74.12495603322407,
|
||||
152.19606490864376,
|
||||
154.19400205373182
|
||||
],
|
||||
"page": 1,
|
||||
"span": [
|
||||
0,
|
||||
7
|
||||
],
|
||||
"__ref_s3_data": null
|
||||
}
|
||||
],
|
||||
"text": "package",
|
||||
"type": "paragraph",
|
||||
"payload": null,
|
||||
"name": "Text",
|
||||
"font": null
|
||||
}
|
||||
],
|
||||
"figures": [],
|
||||
"tables": [],
|
||||
"bitmaps": null,
|
||||
"equations": [],
|
||||
"footnotes": [],
|
||||
"page-dimensions": [
|
||||
{
|
||||
"height": 595.201171875,
|
||||
"page": 1,
|
||||
"width": 841.9216918945312
|
||||
}
|
||||
],
|
||||
"page-footers": [],
|
||||
"page-headers": [],
|
||||
"_s3_data": null,
|
||||
"identifiers": null
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
package
|
||||
@@ -0,0 +1,446 @@
|
||||
[
|
||||
{
|
||||
"page_no": 0,
|
||||
"size": {
|
||||
"width": 841.9216918945312,
|
||||
"height": 595.201171875
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 77.10171546422428,
|
||||
"r_y0": 520.7638577050515,
|
||||
"r_x1": 96.6831586150625,
|
||||
"r_y1": 520.7638577050515,
|
||||
"r_x2": 96.6831586150625,
|
||||
"r_y2": 89.23887398109309,
|
||||
"r_x3": 77.10171546422428,
|
||||
"r_y3": 89.23887398109309,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 100.55299576256091,
|
||||
"r_y0": 523.3155494272656,
|
||||
"r_x1": 124.91101654503161,
|
||||
"r_y1": 523.3155494272656,
|
||||
"r_x2": 124.91101654503161,
|
||||
"r_y2": 89.12381765643227,
|
||||
"r_x3": 100.55299576256091,
|
||||
"r_y3": 89.12381765643227,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 131.21306574279092,
|
||||
"r_y0": 521.0762158417759,
|
||||
"r_x1": 152.19606490864376,
|
||||
"r_y1": 521.0762158417759,
|
||||
"r_x2": 152.19606490864376,
|
||||
"r_y2": 441.0071698212682,
|
||||
"r_x3": 131.21306574279092,
|
||||
"r_y3": 441.0071698212682,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"parsed_page": null,
|
||||
"predictions": {
|
||||
"layout": {
|
||||
"clusters": [
|
||||
{
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 77.10171546422428,
|
||||
"t": 89.12381765643227,
|
||||
"r": 124.91101654503161,
|
||||
"b": 523.3155494272656,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6016772389411926,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 77.10171546422428,
|
||||
"r_y0": 520.7638577050515,
|
||||
"r_x1": 96.6831586150625,
|
||||
"r_y1": 520.7638577050515,
|
||||
"r_x2": 96.6831586150625,
|
||||
"r_y2": 89.23887398109309,
|
||||
"r_x3": 77.10171546422428,
|
||||
"r_y3": 89.23887398109309,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 100.55299576256091,
|
||||
"r_y0": 523.3155494272656,
|
||||
"r_x1": 124.91101654503161,
|
||||
"r_y1": 523.3155494272656,
|
||||
"r_x2": 124.91101654503161,
|
||||
"r_y2": 89.12381765643227,
|
||||
"r_x3": 100.55299576256091,
|
||||
"r_y3": 89.12381765643227,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 131.21306574279092,
|
||||
"t": 441.0071698212682,
|
||||
"r": 152.19606490864376,
|
||||
"b": 521.0762158417759,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5234212875366211,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 131.21306574279092,
|
||||
"r_y0": 521.0762158417759,
|
||||
"r_x1": 152.19606490864376,
|
||||
"r_y1": 521.0762158417759,
|
||||
"r_x2": 152.19606490864376,
|
||||
"r_y2": 441.0071698212682,
|
||||
"r_x3": 131.21306574279092,
|
||||
"r_y3": 441.0071698212682,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"tablestructure": {
|
||||
"table_map": {}
|
||||
},
|
||||
"figures_classification": null,
|
||||
"equations_prediction": null,
|
||||
"vlm_response": null
|
||||
},
|
||||
"assembled": {
|
||||
"elements": [
|
||||
{
|
||||
"label": "page_header",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 77.10171546422428,
|
||||
"t": 89.12381765643227,
|
||||
"r": 124.91101654503161,
|
||||
"b": 523.3155494272656,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6016772389411926,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 77.10171546422428,
|
||||
"r_y0": 520.7638577050515,
|
||||
"r_x1": 96.6831586150625,
|
||||
"r_y1": 520.7638577050515,
|
||||
"r_x2": 96.6831586150625,
|
||||
"r_y2": 89.23887398109309,
|
||||
"r_x3": 77.10171546422428,
|
||||
"r_y3": 89.23887398109309,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 100.55299576256091,
|
||||
"r_y0": 523.3155494272656,
|
||||
"r_x1": 124.91101654503161,
|
||||
"r_y1": 523.3155494272656,
|
||||
"r_x2": 124.91101654503161,
|
||||
"r_y2": 89.12381765643227,
|
||||
"r_x3": 100.55299576256091,
|
||||
"r_y3": 89.12381765643227,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
},
|
||||
{
|
||||
"label": "text",
|
||||
"id": 1,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 1,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 131.21306574279092,
|
||||
"t": 441.0071698212682,
|
||||
"r": 152.19606490864376,
|
||||
"b": 521.0762158417759,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5234212875366211,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 131.21306574279092,
|
||||
"r_y0": 521.0762158417759,
|
||||
"r_x1": 152.19606490864376,
|
||||
"r_y1": 521.0762158417759,
|
||||
"r_x2": 152.19606490864376,
|
||||
"r_y2": 441.0071698212682,
|
||||
"r_x3": 131.21306574279092,
|
||||
"r_y3": 441.0071698212682,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"body": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 1,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 1,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 131.21306574279092,
|
||||
"t": 441.0071698212682,
|
||||
"r": 152.19606490864376,
|
||||
"b": 521.0762158417759,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5234212875366211,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 131.21306574279092,
|
||||
"r_y0": 521.0762158417759,
|
||||
"r_x1": 152.19606490864376,
|
||||
"r_y1": 521.0762158417759,
|
||||
"r_x2": 152.19606490864376,
|
||||
"r_y2": 441.0071698212682,
|
||||
"r_x3": 131.21306574279092,
|
||||
"r_y3": 441.0071698212682,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"headers": [
|
||||
{
|
||||
"label": "page_header",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 77.10171546422428,
|
||||
"t": 89.12381765643227,
|
||||
"r": 124.91101654503161,
|
||||
"b": 523.3155494272656,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6016772389411926,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 77.10171546422428,
|
||||
"r_y0": 520.7638577050515,
|
||||
"r_x1": 96.6831586150625,
|
||||
"r_y1": 520.7638577050515,
|
||||
"r_x2": 96.6831586150625,
|
||||
"r_y2": 89.23887398109309,
|
||||
"r_x3": 77.10171546422428,
|
||||
"r_y3": 89.23887398109309,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 100.55299576256091,
|
||||
"r_y0": 523.3155494272656,
|
||||
"r_x1": 124.91101654503161,
|
||||
"r_y1": 523.3155494272656,
|
||||
"r_x2": 124.91101654503161,
|
||||
"r_y2": 89.12381765643227,
|
||||
"r_x3": 100.55299576256091,
|
||||
"r_y3": 89.12381765643227,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -1,2 +1,2 @@
|
||||
<doctag><text><loc_59><loc_46><loc_424><loc_91>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package</text>
|
||||
<doctag><text><loc_60><loc_46><loc_424><loc_91>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package</text>
|
||||
</doctag>
|
||||
@@ -42,10 +42,10 @@
|
||||
{
|
||||
"page_no": 1,
|
||||
"bbox": {
|
||||
"l": 69.6796630536824,
|
||||
"l": 70.90211866351085,
|
||||
"t": 764.9216921155637,
|
||||
"r": 504.8720051760782,
|
||||
"b": 689.0124221922704,
|
||||
"r": 504.8720079864275,
|
||||
"b": 689.216658542347,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"charspan": [
|
||||
|
||||
@@ -40,14 +40,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 69.6796630536824,
|
||||
"r_y0": 124.83139494707741,
|
||||
"r_x1": 504.8720051760782,
|
||||
"r_y1": 124.83139494707741,
|
||||
"r_x2": 504.8720051760782,
|
||||
"r_y2": 104.00000011573796,
|
||||
"r_x3": 69.6796630536824,
|
||||
"r_y3": 104.00000011573796,
|
||||
"r_x0": 70.90211866351085,
|
||||
"r_y0": 124.83139551297342,
|
||||
"r_x1": 504.8720079864275,
|
||||
"r_y1": 124.83139551297342,
|
||||
"r_x2": 504.8720079864275,
|
||||
"r_y2": 102.66666671251768,
|
||||
"r_x3": 70.90211866351085,
|
||||
"r_y3": 102.66666671251768,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
@@ -65,14 +65,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 71.84193505100733,
|
||||
"r_y0": 152.90926970226084,
|
||||
"r_x1": 153.088934155825,
|
||||
"r_y1": 152.90926970226084,
|
||||
"r_x2": 153.088934155825,
|
||||
"r_y2": 129.797125232046,
|
||||
"r_x3": 71.84193505100733,
|
||||
"r_y3": 129.797125232046,
|
||||
"r_x0": 73.10852522817731,
|
||||
"r_y0": 152.70503335218433,
|
||||
"r_x1": 153.04479435252625,
|
||||
"r_y1": 152.70503335218433,
|
||||
"r_x2": 153.04479435252625,
|
||||
"r_y2": 130.00136157890958,
|
||||
"r_x3": 73.10852522817731,
|
||||
"r_y3": 130.00136157890958,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
@@ -90,13 +90,13 @@
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 69.6796630536824,
|
||||
"l": 70.90211866351085,
|
||||
"t": 76.99999977896756,
|
||||
"r": 504.8720051760782,
|
||||
"b": 152.90926970226084,
|
||||
"r": 504.8720079864275,
|
||||
"b": 152.70503335218433,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9715732336044312,
|
||||
"confidence": 0.9715733528137207,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@@ -132,14 +132,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 69.6796630536824,
|
||||
"r_y0": 124.83139494707741,
|
||||
"r_x1": 504.8720051760782,
|
||||
"r_y1": 124.83139494707741,
|
||||
"r_x2": 504.8720051760782,
|
||||
"r_y2": 104.00000011573796,
|
||||
"r_x3": 69.6796630536824,
|
||||
"r_y3": 104.00000011573796,
|
||||
"r_x0": 70.90211866351085,
|
||||
"r_y0": 124.83139551297342,
|
||||
"r_x1": 504.8720079864275,
|
||||
"r_y1": 124.83139551297342,
|
||||
"r_x2": 504.8720079864275,
|
||||
"r_y2": 102.66666671251768,
|
||||
"r_x3": 70.90211866351085,
|
||||
"r_y3": 102.66666671251768,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
@@ -157,14 +157,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 71.84193505100733,
|
||||
"r_y0": 152.90926970226084,
|
||||
"r_x1": 153.088934155825,
|
||||
"r_y1": 152.90926970226084,
|
||||
"r_x2": 153.088934155825,
|
||||
"r_y2": 129.797125232046,
|
||||
"r_x3": 71.84193505100733,
|
||||
"r_y3": 129.797125232046,
|
||||
"r_x0": 73.10852522817731,
|
||||
"r_y0": 152.70503335218433,
|
||||
"r_x1": 153.04479435252625,
|
||||
"r_y1": 152.70503335218433,
|
||||
"r_x2": 153.04479435252625,
|
||||
"r_y2": 130.00136157890958,
|
||||
"r_x3": 73.10852522817731,
|
||||
"r_y3": 130.00136157890958,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
@@ -195,13 +195,13 @@
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 69.6796630536824,
|
||||
"l": 70.90211866351085,
|
||||
"t": 76.99999977896756,
|
||||
"r": 504.8720051760782,
|
||||
"b": 152.90926970226084,
|
||||
"r": 504.8720079864275,
|
||||
"b": 152.70503335218433,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9715732336044312,
|
||||
"confidence": 0.9715733528137207,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@@ -237,14 +237,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 69.6796630536824,
|
||||
"r_y0": 124.83139494707741,
|
||||
"r_x1": 504.8720051760782,
|
||||
"r_y1": 124.83139494707741,
|
||||
"r_x2": 504.8720051760782,
|
||||
"r_y2": 104.00000011573796,
|
||||
"r_x3": 69.6796630536824,
|
||||
"r_y3": 104.00000011573796,
|
||||
"r_x0": 70.90211866351085,
|
||||
"r_y0": 124.83139551297342,
|
||||
"r_x1": 504.8720079864275,
|
||||
"r_y1": 124.83139551297342,
|
||||
"r_x2": 504.8720079864275,
|
||||
"r_y2": 102.66666671251768,
|
||||
"r_x3": 70.90211866351085,
|
||||
"r_y3": 102.66666671251768,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
@@ -262,14 +262,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 71.84193505100733,
|
||||
"r_y0": 152.90926970226084,
|
||||
"r_x1": 153.088934155825,
|
||||
"r_y1": 152.90926970226084,
|
||||
"r_x2": 153.088934155825,
|
||||
"r_y2": 129.797125232046,
|
||||
"r_x3": 71.84193505100733,
|
||||
"r_y3": 129.797125232046,
|
||||
"r_x0": 73.10852522817731,
|
||||
"r_y0": 152.70503335218433,
|
||||
"r_x1": 153.04479435252625,
|
||||
"r_y1": 152.70503335218433,
|
||||
"r_x2": 153.04479435252625,
|
||||
"r_y2": 130.00136157890958,
|
||||
"r_x3": 73.10852522817731,
|
||||
"r_y3": 130.00136157890958,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
@@ -293,13 +293,13 @@
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 69.6796630536824,
|
||||
"l": 70.90211866351085,
|
||||
"t": 76.99999977896756,
|
||||
"r": 504.8720051760782,
|
||||
"b": 152.90926970226084,
|
||||
"r": 504.8720079864275,
|
||||
"b": 152.70503335218433,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9715732336044312,
|
||||
"confidence": 0.9715733528137207,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@@ -335,14 +335,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 69.6796630536824,
|
||||
"r_y0": 124.83139494707741,
|
||||
"r_x1": 504.8720051760782,
|
||||
"r_y1": 124.83139494707741,
|
||||
"r_x2": 504.8720051760782,
|
||||
"r_y2": 104.00000011573796,
|
||||
"r_x3": 69.6796630536824,
|
||||
"r_y3": 104.00000011573796,
|
||||
"r_x0": 70.90211866351085,
|
||||
"r_y0": 124.83139551297342,
|
||||
"r_x1": 504.8720079864275,
|
||||
"r_y1": 124.83139551297342,
|
||||
"r_x2": 504.8720079864275,
|
||||
"r_y2": 102.66666671251768,
|
||||
"r_x3": 70.90211866351085,
|
||||
"r_y3": 102.66666671251768,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
@@ -360,14 +360,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 71.84193505100733,
|
||||
"r_y0": 152.90926970226084,
|
||||
"r_x1": 153.088934155825,
|
||||
"r_y1": 152.90926970226084,
|
||||
"r_x2": 153.088934155825,
|
||||
"r_y2": 129.797125232046,
|
||||
"r_x3": 71.84193505100733,
|
||||
"r_y3": 129.797125232046,
|
||||
"r_x0": 73.10852522817731,
|
||||
"r_y0": 152.70503335218433,
|
||||
"r_x1": 153.04479435252625,
|
||||
"r_y1": 152.70503335218433,
|
||||
"r_x2": 153.04479435252625,
|
||||
"r_y2": 130.00136157890958,
|
||||
"r_x3": 73.10852522817731,
|
||||
"r_y3": 130.00136157890958,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
<doctag><text><loc_371><loc_410><loc_438><loc_422>package</text>
|
||||
<text><loc_75><loc_426><loc_440><loc_454>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained</text>
|
||||
</doctag>
|
||||
@@ -0,0 +1,109 @@
|
||||
{
|
||||
"schema_name": "DoclingDocument",
|
||||
"version": "1.3.0",
|
||||
"name": "ocr_test_rotated_180",
|
||||
"origin": {
|
||||
"mimetype": "application/pdf",
|
||||
"binary_hash": 2530576989861832966,
|
||||
"filename": "ocr_test_rotated_180.pdf",
|
||||
"uri": null
|
||||
},
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"parent": null,
|
||||
"children": [],
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
"body": {
|
||||
"self_ref": "#/body",
|
||||
"parent": null,
|
||||
"children": [
|
||||
{
|
||||
"cref": "#/texts/0"
|
||||
},
|
||||
{
|
||||
"cref": "#/texts/1"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
"groups": [],
|
||||
"texts": [
|
||||
{
|
||||
"self_ref": "#/texts/0",
|
||||
"parent": {
|
||||
"cref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "text",
|
||||
"prov": [
|
||||
{
|
||||
"page_no": 1,
|
||||
"bbox": {
|
||||
"l": 441.304584329099,
|
||||
"t": 151.67751306395223,
|
||||
"r": 521.9863114205704,
|
||||
"b": 132.09610360960653,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"charspan": [
|
||||
0,
|
||||
7
|
||||
]
|
||||
}
|
||||
],
|
||||
"orig": "package",
|
||||
"text": "package",
|
||||
"formatting": null,
|
||||
"hyperlink": null
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/1",
|
||||
"parent": {
|
||||
"cref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "text",
|
||||
"prov": [
|
||||
{
|
||||
"page_no": 1,
|
||||
"bbox": {
|
||||
"l": 89.12133215549848,
|
||||
"t": 124.86176457554109,
|
||||
"r": 523.3501733013318,
|
||||
"b": 77.02339849621205,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"charspan": [
|
||||
0,
|
||||
86
|
||||
]
|
||||
}
|
||||
],
|
||||
"orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained",
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained",
|
||||
"formatting": null,
|
||||
"hyperlink": null
|
||||
}
|
||||
],
|
||||
"pictures": [],
|
||||
"tables": [],
|
||||
"key_value_items": [],
|
||||
"form_items": [],
|
||||
"pages": {
|
||||
"1": {
|
||||
"size": {
|
||||
"width": 595.201171875,
|
||||
"height": 841.9216918945312
|
||||
},
|
||||
"image": null,
|
||||
"page_no": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
package
|
||||
|
||||
Docling bundles PDF document conversion to JSON and Markdown in an easy self contained
|
||||
@@ -0,0 +1,445 @@
|
||||
[
|
||||
{
|
||||
"page_no": 0,
|
||||
"size": {
|
||||
"width": 595.201171875,
|
||||
"height": 841.9216918945312
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 90.46133071208328,
|
||||
"r_y0": 764.8982933983192,
|
||||
"r_x1": 520.7638616365624,
|
||||
"r_y1": 764.8982933983192,
|
||||
"r_x2": 520.7638616365624,
|
||||
"r_y2": 744.0929853742306,
|
||||
"r_x3": 90.46133071208328,
|
||||
"r_y3": 744.0929853742306,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.12133215549848,
|
||||
"r_y0": 741.5247710689902,
|
||||
"r_x1": 523.3501733013318,
|
||||
"r_y1": 741.5247710689902,
|
||||
"r_x2": 523.3501733013318,
|
||||
"r_y2": 717.0599273189902,
|
||||
"r_x3": 89.12133215549848,
|
||||
"r_y3": 717.0599273189902,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 441.304584329099,
|
||||
"r_y0": 709.8255882849247,
|
||||
"r_x1": 521.9863114205704,
|
||||
"r_y1": 709.8255882849247,
|
||||
"r_x2": 521.9863114205704,
|
||||
"r_y2": 690.244178830579,
|
||||
"r_x3": 441.304584329099,
|
||||
"r_y3": 690.244178830579,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"parsed_page": null,
|
||||
"predictions": {
|
||||
"layout": {
|
||||
"clusters": [
|
||||
{
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 89.12133215549848,
|
||||
"t": 717.0599273189902,
|
||||
"r": 523.3501733013318,
|
||||
"b": 764.8982933983192,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.7318570613861084,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 90.46133071208328,
|
||||
"r_y0": 764.8982933983192,
|
||||
"r_x1": 520.7638616365624,
|
||||
"r_y1": 764.8982933983192,
|
||||
"r_x2": 520.7638616365624,
|
||||
"r_y2": 744.0929853742306,
|
||||
"r_x3": 90.46133071208328,
|
||||
"r_y3": 744.0929853742306,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.12133215549848,
|
||||
"r_y0": 741.5247710689902,
|
||||
"r_x1": 523.3501733013318,
|
||||
"r_y1": 741.5247710689902,
|
||||
"r_x2": 523.3501733013318,
|
||||
"r_y2": 717.0599273189902,
|
||||
"r_x3": 89.12133215549848,
|
||||
"r_y3": 717.0599273189902,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 441.304584329099,
|
||||
"t": 690.244178830579,
|
||||
"r": 521.9863114205704,
|
||||
"b": 709.8255882849247,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5982133150100708,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 441.304584329099,
|
||||
"r_y0": 709.8255882849247,
|
||||
"r_x1": 521.9863114205704,
|
||||
"r_y1": 709.8255882849247,
|
||||
"r_x2": 521.9863114205704,
|
||||
"r_y2": 690.244178830579,
|
||||
"r_x3": 441.304584329099,
|
||||
"r_y3": 690.244178830579,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"tablestructure": {
|
||||
"table_map": {}
|
||||
},
|
||||
"figures_classification": null,
|
||||
"equations_prediction": null,
|
||||
"vlm_response": null
|
||||
},
|
||||
"assembled": {
|
||||
"elements": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 89.12133215549848,
|
||||
"t": 717.0599273189902,
|
||||
"r": 523.3501733013318,
|
||||
"b": 764.8982933983192,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.7318570613861084,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 90.46133071208328,
|
||||
"r_y0": 764.8982933983192,
|
||||
"r_x1": 520.7638616365624,
|
||||
"r_y1": 764.8982933983192,
|
||||
"r_x2": 520.7638616365624,
|
||||
"r_y2": 744.0929853742306,
|
||||
"r_x3": 90.46133071208328,
|
||||
"r_y3": 744.0929853742306,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.12133215549848,
|
||||
"r_y0": 741.5247710689902,
|
||||
"r_x1": 523.3501733013318,
|
||||
"r_y1": 741.5247710689902,
|
||||
"r_x2": 523.3501733013318,
|
||||
"r_y2": 717.0599273189902,
|
||||
"r_x3": 89.12133215549848,
|
||||
"r_y3": 717.0599273189902,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
},
|
||||
{
|
||||
"label": "text",
|
||||
"id": 2,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 2,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 441.304584329099,
|
||||
"t": 690.244178830579,
|
||||
"r": 521.9863114205704,
|
||||
"b": 709.8255882849247,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5982133150100708,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 441.304584329099,
|
||||
"r_y0": 709.8255882849247,
|
||||
"r_x1": 521.9863114205704,
|
||||
"r_y1": 709.8255882849247,
|
||||
"r_x2": 521.9863114205704,
|
||||
"r_y2": 690.244178830579,
|
||||
"r_x3": 441.304584329099,
|
||||
"r_y3": 690.244178830579,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"body": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 89.12133215549848,
|
||||
"t": 717.0599273189902,
|
||||
"r": 523.3501733013318,
|
||||
"b": 764.8982933983192,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.7318570613861084,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 90.46133071208328,
|
||||
"r_y0": 764.8982933983192,
|
||||
"r_x1": 520.7638616365624,
|
||||
"r_y1": 764.8982933983192,
|
||||
"r_x2": 520.7638616365624,
|
||||
"r_y2": 744.0929853742306,
|
||||
"r_x3": 90.46133071208328,
|
||||
"r_y3": 744.0929853742306,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 89.12133215549848,
|
||||
"r_y0": 741.5247710689902,
|
||||
"r_x1": 523.3501733013318,
|
||||
"r_y1": 741.5247710689902,
|
||||
"r_x2": 523.3501733013318,
|
||||
"r_y2": 717.0599273189902,
|
||||
"r_x3": 89.12133215549848,
|
||||
"r_y3": 717.0599273189902,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
},
|
||||
{
|
||||
"label": "text",
|
||||
"id": 2,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 2,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 441.304584329099,
|
||||
"t": 690.244178830579,
|
||||
"r": 521.9863114205704,
|
||||
"b": 709.8255882849247,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5982133150100708,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 441.304584329099,
|
||||
"r_y0": 709.8255882849247,
|
||||
"r_x1": 521.9863114205704,
|
||||
"r_y1": 709.8255882849247,
|
||||
"r_x2": 521.9863114205704,
|
||||
"r_y2": 690.244178830579,
|
||||
"r_x3": 441.304584329099,
|
||||
"r_y3": 690.244178830579,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"headers": []
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,3 @@
|
||||
<doctag><page_header><loc_426><loc_60><loc_454><loc_424>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained</page_header>
|
||||
<text><loc_411><loc_61><loc_422><loc_128>package</text>
|
||||
</doctag>
|
||||
@@ -0,0 +1,109 @@
|
||||
{
|
||||
"schema_name": "DoclingDocument",
|
||||
"version": "1.3.0",
|
||||
"name": "ocr_test_rotated_270",
|
||||
"origin": {
|
||||
"mimetype": "application/pdf",
|
||||
"binary_hash": 10890858393843077593,
|
||||
"filename": "ocr_test_rotated_270.pdf",
|
||||
"uri": null
|
||||
},
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"parent": null,
|
||||
"children": [],
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
"body": {
|
||||
"self_ref": "#/body",
|
||||
"parent": null,
|
||||
"children": [
|
||||
{
|
||||
"cref": "#/texts/0"
|
||||
},
|
||||
{
|
||||
"cref": "#/texts/1"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
"groups": [],
|
||||
"texts": [
|
||||
{
|
||||
"self_ref": "#/texts/0",
|
||||
"parent": {
|
||||
"cref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "furniture",
|
||||
"label": "page_header",
|
||||
"prov": [
|
||||
{
|
||||
"page_no": 1,
|
||||
"bbox": {
|
||||
"l": 717.1685859527342,
|
||||
"t": 524.2990548540179,
|
||||
"r": 764.8982839673505,
|
||||
"b": 90.32916553110118,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"charspan": [
|
||||
0,
|
||||
86
|
||||
]
|
||||
}
|
||||
],
|
||||
"orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained",
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained",
|
||||
"formatting": null,
|
||||
"hyperlink": null
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/1",
|
||||
"parent": {
|
||||
"cref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "text",
|
||||
"prov": [
|
||||
{
|
||||
"page_no": 1,
|
||||
"bbox": {
|
||||
"l": 691.4680194659409,
|
||||
"t": 523.0765988200898,
|
||||
"r": 709.8255850278712,
|
||||
"b": 442.3948768148814,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"charspan": [
|
||||
0,
|
||||
7
|
||||
]
|
||||
}
|
||||
],
|
||||
"orig": "package",
|
||||
"text": "package",
|
||||
"formatting": null,
|
||||
"hyperlink": null
|
||||
}
|
||||
],
|
||||
"pictures": [],
|
||||
"tables": [],
|
||||
"key_value_items": [],
|
||||
"form_items": [],
|
||||
"pages": {
|
||||
"1": {
|
||||
"size": {
|
||||
"width": 841.9216918945312,
|
||||
"height": 595.201171875
|
||||
},
|
||||
"image": null,
|
||||
"page_no": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
package
|
||||
@@ -0,0 +1,446 @@
|
||||
[
|
||||
{
|
||||
"page_no": 0,
|
||||
"size": {
|
||||
"width": 841.9216918945312,
|
||||
"height": 595.201171875
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 744.0930045534915,
|
||||
"r_y0": 504.87200373583954,
|
||||
"r_x1": 764.8982839673505,
|
||||
"r_y1": 504.87200373583954,
|
||||
"r_x2": 764.8982839673505,
|
||||
"r_y2": 73.34702001188118,
|
||||
"r_x3": 744.0930045534915,
|
||||
"r_y3": 73.34702001188118,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 717.1685859527342,
|
||||
"r_y0": 504.8720063438988,
|
||||
"r_x1": 737.9738558298501,
|
||||
"r_y1": 504.8720063438988,
|
||||
"r_x2": 737.9738558298501,
|
||||
"r_y2": 70.90211702098213,
|
||||
"r_x3": 717.1685859527342,
|
||||
"r_y3": 70.90211702098213,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 691.4680194659409,
|
||||
"r_y0": 152.80629506011857,
|
||||
"r_x1": 709.8255850278712,
|
||||
"r_y1": 152.80629506011857,
|
||||
"r_x2": 709.8255850278712,
|
||||
"r_y2": 72.12457305491027,
|
||||
"r_x3": 691.4680194659409,
|
||||
"r_y3": 72.12457305491027,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"parsed_page": null,
|
||||
"predictions": {
|
||||
"layout": {
|
||||
"clusters": [
|
||||
{
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 717.1685859527342,
|
||||
"t": 70.90211702098213,
|
||||
"r": 764.8982839673505,
|
||||
"b": 504.8720063438988,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6915205121040344,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 744.0930045534915,
|
||||
"r_y0": 504.87200373583954,
|
||||
"r_x1": 764.8982839673505,
|
||||
"r_y1": 504.87200373583954,
|
||||
"r_x2": 764.8982839673505,
|
||||
"r_y2": 73.34702001188118,
|
||||
"r_x3": 744.0930045534915,
|
||||
"r_y3": 73.34702001188118,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 717.1685859527342,
|
||||
"r_y0": 504.8720063438988,
|
||||
"r_x1": 737.9738558298501,
|
||||
"r_y1": 504.8720063438988,
|
||||
"r_x2": 737.9738558298501,
|
||||
"r_y2": 70.90211702098213,
|
||||
"r_x3": 717.1685859527342,
|
||||
"r_y3": 70.90211702098213,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 691.4680194659409,
|
||||
"t": 72.12457305491027,
|
||||
"r": 709.8255850278712,
|
||||
"b": 152.80629506011857,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 1.0,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 691.4680194659409,
|
||||
"r_y0": 152.80629506011857,
|
||||
"r_x1": 709.8255850278712,
|
||||
"r_y1": 152.80629506011857,
|
||||
"r_x2": 709.8255850278712,
|
||||
"r_y2": 72.12457305491027,
|
||||
"r_x3": 691.4680194659409,
|
||||
"r_y3": 72.12457305491027,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"tablestructure": {
|
||||
"table_map": {}
|
||||
},
|
||||
"figures_classification": null,
|
||||
"equations_prediction": null,
|
||||
"vlm_response": null
|
||||
},
|
||||
"assembled": {
|
||||
"elements": [
|
||||
{
|
||||
"label": "page_header",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 717.1685859527342,
|
||||
"t": 70.90211702098213,
|
||||
"r": 764.8982839673505,
|
||||
"b": 504.8720063438988,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6915205121040344,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 744.0930045534915,
|
||||
"r_y0": 504.87200373583954,
|
||||
"r_x1": 764.8982839673505,
|
||||
"r_y1": 504.87200373583954,
|
||||
"r_x2": 764.8982839673505,
|
||||
"r_y2": 73.34702001188118,
|
||||
"r_x3": 744.0930045534915,
|
||||
"r_y3": 73.34702001188118,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 717.1685859527342,
|
||||
"r_y0": 504.8720063438988,
|
||||
"r_x1": 737.9738558298501,
|
||||
"r_y1": 504.8720063438988,
|
||||
"r_x2": 737.9738558298501,
|
||||
"r_y2": 70.90211702098213,
|
||||
"r_x3": 717.1685859527342,
|
||||
"r_y3": 70.90211702098213,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
},
|
||||
{
|
||||
"label": "text",
|
||||
"id": 8,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 8,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 691.4680194659409,
|
||||
"t": 72.12457305491027,
|
||||
"r": 709.8255850278712,
|
||||
"b": 152.80629506011857,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 1.0,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 691.4680194659409,
|
||||
"r_y0": 152.80629506011857,
|
||||
"r_x1": 709.8255850278712,
|
||||
"r_y1": 152.80629506011857,
|
||||
"r_x2": 709.8255850278712,
|
||||
"r_y2": 72.12457305491027,
|
||||
"r_x3": 691.4680194659409,
|
||||
"r_y3": 72.12457305491027,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"body": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 8,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 8,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 691.4680194659409,
|
||||
"t": 72.12457305491027,
|
||||
"r": 709.8255850278712,
|
||||
"b": 152.80629506011857,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 1.0,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 691.4680194659409,
|
||||
"r_y0": 152.80629506011857,
|
||||
"r_x1": 709.8255850278712,
|
||||
"r_y1": 152.80629506011857,
|
||||
"r_x2": 709.8255850278712,
|
||||
"r_y2": 72.12457305491027,
|
||||
"r_x3": 691.4680194659409,
|
||||
"r_y3": 72.12457305491027,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"headers": [
|
||||
{
|
||||
"label": "page_header",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 717.1685859527342,
|
||||
"t": 70.90211702098213,
|
||||
"r": 764.8982839673505,
|
||||
"b": 504.8720063438988,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6915205121040344,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 744.0930045534915,
|
||||
"r_y0": 504.87200373583954,
|
||||
"r_x1": 764.8982839673505,
|
||||
"r_y1": 504.87200373583954,
|
||||
"r_x2": 764.8982839673505,
|
||||
"r_y2": 73.34702001188118,
|
||||
"r_x3": 744.0930045534915,
|
||||
"r_y3": 73.34702001188118,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 717.1685859527342,
|
||||
"r_y0": 504.8720063438988,
|
||||
"r_x1": 737.9738558298501,
|
||||
"r_y1": 504.8720063438988,
|
||||
"r_x2": 737.9738558298501,
|
||||
"r_y2": 70.90211702098213,
|
||||
"r_x3": 717.1685859527342,
|
||||
"r_y3": 70.90211702098213,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,3 @@
|
||||
<doctag><page_header><loc_46><loc_75><loc_74><loc_440>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained</page_header>
|
||||
<text><loc_78><loc_370><loc_90><loc_438>package</text>
|
||||
</doctag>
|
||||
@@ -0,0 +1,109 @@
|
||||
{
|
||||
"schema_name": "DoclingDocument",
|
||||
"version": "1.3.0",
|
||||
"name": "ocr_test_rotated_90",
|
||||
"origin": {
|
||||
"mimetype": "application/pdf",
|
||||
"binary_hash": 6989291015361162334,
|
||||
"filename": "ocr_test_rotated_90.pdf",
|
||||
"uri": null
|
||||
},
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"parent": null,
|
||||
"children": [],
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
"body": {
|
||||
"self_ref": "#/body",
|
||||
"parent": null,
|
||||
"children": [
|
||||
{
|
||||
"cref": "#/texts/0"
|
||||
},
|
||||
{
|
||||
"cref": "#/texts/1"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
"groups": [],
|
||||
"texts": [
|
||||
{
|
||||
"self_ref": "#/texts/0",
|
||||
"parent": {
|
||||
"cref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "furniture",
|
||||
"label": "page_header",
|
||||
"prov": [
|
||||
{
|
||||
"page_no": 1,
|
||||
"bbox": {
|
||||
"l": 77.10171546422428,
|
||||
"t": 506.07735421856773,
|
||||
"r": 124.91101654503161,
|
||||
"b": 71.88562244773436,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"charspan": [
|
||||
0,
|
||||
86
|
||||
]
|
||||
}
|
||||
],
|
||||
"orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained",
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained",
|
||||
"formatting": null,
|
||||
"hyperlink": null
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/1",
|
||||
"parent": {
|
||||
"cref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "text",
|
||||
"prov": [
|
||||
{
|
||||
"page_no": 1,
|
||||
"bbox": {
|
||||
"l": 131.21306574279092,
|
||||
"t": 154.19400205373182,
|
||||
"r": 152.19606490864376,
|
||||
"b": 74.12495603322407,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"charspan": [
|
||||
0,
|
||||
7
|
||||
]
|
||||
}
|
||||
],
|
||||
"orig": "package",
|
||||
"text": "package",
|
||||
"formatting": null,
|
||||
"hyperlink": null
|
||||
}
|
||||
],
|
||||
"pictures": [],
|
||||
"tables": [],
|
||||
"key_value_items": [],
|
||||
"form_items": [],
|
||||
"pages": {
|
||||
"1": {
|
||||
"size": {
|
||||
"width": 841.9216918945312,
|
||||
"height": 595.201171875
|
||||
},
|
||||
"image": null,
|
||||
"page_no": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
package
|
||||
@@ -0,0 +1,446 @@
|
||||
[
|
||||
{
|
||||
"page_no": 0,
|
||||
"size": {
|
||||
"width": 841.9216918945312,
|
||||
"height": 595.201171875
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 77.10171546422428,
|
||||
"r_y0": 520.7638577050515,
|
||||
"r_x1": 96.6831586150625,
|
||||
"r_y1": 520.7638577050515,
|
||||
"r_x2": 96.6831586150625,
|
||||
"r_y2": 89.23887398109309,
|
||||
"r_x3": 77.10171546422428,
|
||||
"r_y3": 89.23887398109309,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 100.55299576256091,
|
||||
"r_y0": 523.3155494272656,
|
||||
"r_x1": 124.91101654503161,
|
||||
"r_y1": 523.3155494272656,
|
||||
"r_x2": 124.91101654503161,
|
||||
"r_y2": 89.12381765643227,
|
||||
"r_x3": 100.55299576256091,
|
||||
"r_y3": 89.12381765643227,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 131.21306574279092,
|
||||
"r_y0": 521.0762158417759,
|
||||
"r_x1": 152.19606490864376,
|
||||
"r_y1": 521.0762158417759,
|
||||
"r_x2": 152.19606490864376,
|
||||
"r_y2": 441.0071698212682,
|
||||
"r_x3": 131.21306574279092,
|
||||
"r_y3": 441.0071698212682,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"parsed_page": null,
|
||||
"predictions": {
|
||||
"layout": {
|
||||
"clusters": [
|
||||
{
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 77.10171546422428,
|
||||
"t": 89.12381765643227,
|
||||
"r": 124.91101654503161,
|
||||
"b": 523.3155494272656,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6016772389411926,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 77.10171546422428,
|
||||
"r_y0": 520.7638577050515,
|
||||
"r_x1": 96.6831586150625,
|
||||
"r_y1": 520.7638577050515,
|
||||
"r_x2": 96.6831586150625,
|
||||
"r_y2": 89.23887398109309,
|
||||
"r_x3": 77.10171546422428,
|
||||
"r_y3": 89.23887398109309,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 100.55299576256091,
|
||||
"r_y0": 523.3155494272656,
|
||||
"r_x1": 124.91101654503161,
|
||||
"r_y1": 523.3155494272656,
|
||||
"r_x2": 124.91101654503161,
|
||||
"r_y2": 89.12381765643227,
|
||||
"r_x3": 100.55299576256091,
|
||||
"r_y3": 89.12381765643227,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 131.21306574279092,
|
||||
"t": 441.0071698212682,
|
||||
"r": 152.19606490864376,
|
||||
"b": 521.0762158417759,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5234212875366211,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 131.21306574279092,
|
||||
"r_y0": 521.0762158417759,
|
||||
"r_x1": 152.19606490864376,
|
||||
"r_y1": 521.0762158417759,
|
||||
"r_x2": 152.19606490864376,
|
||||
"r_y2": 441.0071698212682,
|
||||
"r_x3": 131.21306574279092,
|
||||
"r_y3": 441.0071698212682,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"tablestructure": {
|
||||
"table_map": {}
|
||||
},
|
||||
"figures_classification": null,
|
||||
"equations_prediction": null,
|
||||
"vlm_response": null
|
||||
},
|
||||
"assembled": {
|
||||
"elements": [
|
||||
{
|
||||
"label": "page_header",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 77.10171546422428,
|
||||
"t": 89.12381765643227,
|
||||
"r": 124.91101654503161,
|
||||
"b": 523.3155494272656,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6016772389411926,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 77.10171546422428,
|
||||
"r_y0": 520.7638577050515,
|
||||
"r_x1": 96.6831586150625,
|
||||
"r_y1": 520.7638577050515,
|
||||
"r_x2": 96.6831586150625,
|
||||
"r_y2": 89.23887398109309,
|
||||
"r_x3": 77.10171546422428,
|
||||
"r_y3": 89.23887398109309,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 100.55299576256091,
|
||||
"r_y0": 523.3155494272656,
|
||||
"r_x1": 124.91101654503161,
|
||||
"r_y1": 523.3155494272656,
|
||||
"r_x2": 124.91101654503161,
|
||||
"r_y2": 89.12381765643227,
|
||||
"r_x3": 100.55299576256091,
|
||||
"r_y3": 89.12381765643227,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
},
|
||||
{
|
||||
"label": "text",
|
||||
"id": 1,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 1,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 131.21306574279092,
|
||||
"t": 441.0071698212682,
|
||||
"r": 152.19606490864376,
|
||||
"b": 521.0762158417759,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5234212875366211,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 131.21306574279092,
|
||||
"r_y0": 521.0762158417759,
|
||||
"r_x1": 152.19606490864376,
|
||||
"r_y1": 521.0762158417759,
|
||||
"r_x2": 152.19606490864376,
|
||||
"r_y2": 441.0071698212682,
|
||||
"r_x3": 131.21306574279092,
|
||||
"r_y3": 441.0071698212682,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"body": [
|
||||
{
|
||||
"label": "text",
|
||||
"id": 1,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 1,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 131.21306574279092,
|
||||
"t": 441.0071698212682,
|
||||
"r": 152.19606490864376,
|
||||
"b": 521.0762158417759,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.5234212875366211,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 131.21306574279092,
|
||||
"r_y0": 521.0762158417759,
|
||||
"r_x1": 152.19606490864376,
|
||||
"r_y1": 521.0762158417759,
|
||||
"r_x2": 152.19606490864376,
|
||||
"r_y2": 441.0071698212682,
|
||||
"r_x3": 131.21306574279092,
|
||||
"r_y3": 441.0071698212682,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
"orig": "package",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "package"
|
||||
}
|
||||
],
|
||||
"headers": [
|
||||
{
|
||||
"label": "page_header",
|
||||
"id": 0,
|
||||
"page_no": 0,
|
||||
"cluster": {
|
||||
"id": 0,
|
||||
"label": "page_header",
|
||||
"bbox": {
|
||||
"l": 77.10171546422428,
|
||||
"t": 89.12381765643227,
|
||||
"r": 124.91101654503161,
|
||||
"b": 523.3155494272656,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.6016772389411926,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 77.10171546422428,
|
||||
"r_y0": 520.7638577050515,
|
||||
"r_x1": 96.6831586150625,
|
||||
"r_y1": 520.7638577050515,
|
||||
"r_x2": 96.6831586150625,
|
||||
"r_y2": 89.23887398109309,
|
||||
"r_x3": 77.10171546422428,
|
||||
"r_y3": 89.23887398109309,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to",
|
||||
"orig": "Docling bundles PDF document conversion to",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
},
|
||||
{
|
||||
"index": 1,
|
||||
"rgba": {
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 0,
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 100.55299576256091,
|
||||
"r_y0": 523.3155494272656,
|
||||
"r_x1": 124.91101654503161,
|
||||
"r_y1": 523.3155494272656,
|
||||
"r_x2": 124.91101654503161,
|
||||
"r_y2": 89.12381765643227,
|
||||
"r_x3": 100.55299576256091,
|
||||
"r_y3": 89.12381765643227,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
"orig": "JSON and Markdown in an easy self contained",
|
||||
"text_direction": "left_to_right",
|
||||
"confidence": 1.0,
|
||||
"from_ocr": true
|
||||
}
|
||||
],
|
||||
"children": []
|
||||
},
|
||||
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user