mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 23:12:20 +00:00
fix: update all test cases
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
04f9963396
commit
c9b0b5aff3
2
poetry.lock
generated
2
poetry.lock
generated
@ -887,7 +887,7 @@ chunking = ["semchunk (>=2.2.0,<3.0.0)", "transformers (>=4.34.0,<5.0.0)"]
|
||||
type = "git"
|
||||
url = "ssh://git@github.com/DS4SD/docling-core.git"
|
||||
reference = "cau/add-content-layer"
|
||||
resolved_reference = "1609660791cbf83e7ffa3e5e1877f08c0ae986d7"
|
||||
resolved_reference = "4c19ae7f7fb128a12173c50a9ab376114f70a689"
|
||||
|
||||
[[package]]
|
||||
name = "docling-ibm-models"
|
||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
File diff suppressed because one or more lines are too long
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
File diff suppressed because one or more lines are too long
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -10,7 +10,7 @@
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
|
@ -1 +1 @@
|
||||
[{"page_no": 0, "size": {"width": 595.201171875, "height": 841.9216918945312}, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896756, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573796, "r": 504.8720051760782, "b": 124.83139494707741, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.797125232046, "r": 153.088934155825, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.99999977896756, "r": 504.8720051760782, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}, "confidence": 0.9715732336044312, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896756, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573796, "r": 504.8720051760782, "b": 124.83139494707741, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.797125232046, "r": 153.088934155825, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null}, "assembled": {"elements": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.99999977896756, "r": 504.8720051760782, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}, "confidence": 0.9715732336044312, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896756, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573796, "r": 504.8720051760782, "b": 124.83139494707741, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.797125232046, "r": 153.088934155825, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "body": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.99999977896756, "r": 504.8720051760782, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}, "confidence": 0.9715732336044312, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896756, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573796, "r": 504.8720051760782, "b": 124.83139494707741, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.797125232046, "r": 153.088934155825, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "headers": []}}]
|
||||
[{"page_no": 0, "size": {"width": 595.201171875, "height": 841.9216918945312}, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896756, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573796, "r": 504.8720051760782, "b": 124.83139494707741, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.797125232046, "r": 153.088934155825, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.99999977896756, "r": 504.8720051760782, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896756, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573796, "r": 504.8720051760782, "b": 124.83139494707741, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.797125232046, "r": 153.088934155825, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null}, "assembled": {"elements": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.99999977896756, "r": 504.8720051760782, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896756, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573796, "r": 504.8720051760782, "b": 124.83139494707741, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.797125232046, "r": 153.088934155825, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "body": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.99999977896756, "r": 504.8720051760782, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896756, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573796, "r": 504.8720051760782, "b": 124.83139494707741, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.797125232046, "r": 153.088934155825, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "headers": []}}]
|
@ -1 +1 @@
|
||||
{"schema_name": "DoclingDocument", "version": "1.0.0", "name": "ocr_test", "origin": {"mimetype": "application/pdf", "binary_hash": 14853448746796404529, "filename": "ocr_test.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}], "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 69.6796646118164, "t": 764.9216918945312, "r": 504.87200927734375, "b": 689.012451171875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 94]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "pictures": [], "tables": [], "key_value_items": [], "pages": {"1": {"size": {"width": 595.201171875, "height": 841.9216918945312}, "image": null, "page_no": 1}}}
|
||||
{"schema_name": "DoclingDocument", "version": "1.0.0", "name": "ocr_test", "origin": {"mimetype": "application/pdf", "binary_hash": 14853448746796404529, "filename": "ocr_test.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}], "content_layer": "body", "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 69.6796646118164, "t": 764.9216918945312, "r": 504.87200927734375, "b": 689.012451171875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 94]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "pictures": [], "tables": [], "key_value_items": [], "pages": {"1": {"size": {"width": 595.201171875, "height": 841.9216918945312}, "image": null, "page_no": 1}}}
|
@ -1 +1 @@
|
||||
[{"page_no": 0, "size": {"width": 595.201171875, "height": 841.9216918945312}, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896756, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573796, "r": 504.8720051760782, "b": 124.83139494707741, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.797125232046, "r": 153.088934155825, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.99999977896756, "r": 504.8720051760782, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}, "confidence": 0.9715732336044312, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896756, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573796, "r": 504.8720051760782, "b": 124.83139494707741, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.797125232046, "r": 153.088934155825, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null}, "assembled": {"elements": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.99999977896756, "r": 504.8720051760782, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}, "confidence": 0.9715732336044312, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896756, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573796, "r": 504.8720051760782, "b": 124.83139494707741, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.797125232046, "r": 153.088934155825, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "body": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.99999977896756, "r": 504.8720051760782, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}, "confidence": 0.9715732336044312, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896756, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573796, "r": 504.8720051760782, "b": 124.83139494707741, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.797125232046, "r": 153.088934155825, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "headers": []}}]
|
||||
[{"page_no": 0, "size": {"width": 595.201171875, "height": 841.9216918945312}, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896756, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573796, "r": 504.8720051760782, "b": 124.83139494707741, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.797125232046, "r": 153.088934155825, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.99999977896756, "r": 504.8720051760782, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896756, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573796, "r": 504.8720051760782, "b": 124.83139494707741, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.797125232046, "r": 153.088934155825, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null}, "assembled": {"elements": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.99999977896756, "r": 504.8720051760782, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896756, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573796, "r": 504.8720051760782, "b": 124.83139494707741, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.797125232046, "r": 153.088934155825, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "body": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.99999977896756, "r": 504.8720051760782, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896756, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573796, "r": 504.8720051760782, "b": 124.83139494707741, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.797125232046, "r": 153.088934155825, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "headers": []}}]
|
Loading…
Reference in New Issue
Block a user