mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 03:55:00 +00:00
* updated the cli to output html in split-page mode Signed-off-by: Peter Staar <taa@zurich.ibm.com> * add pin for new docling-core with html split argument Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * relock with fixed html export in docling-core Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update test results Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update more tests Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update example Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update lock with docling-core fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update test results Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add again chunking extras Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Peter Staar <taa@zurich.ibm.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
1 line
6.8 KiB
JSON
1 line
6.8 KiB
JSON
[{"page_no": 0, "size": {"width": 595.201171875, "height": 841.9216918945312}, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 73.34702132031646, "r_y0": 97.99999977896755, "r_x1": 503.64955224479564, "r_y1": 97.99999977896755, "r_x2": 503.64955224479564, "r_y2": 76.99999977896756, "r_x3": 73.34702132031646, "r_y3": 76.99999977896756, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 69.6796630536824, "r_y0": 124.83139494707741, "r_x1": 504.8720051760782, "r_y1": 124.83139494707741, "r_x2": 504.8720051760782, "r_y2": 104.00000011573796, "r_x3": 69.6796630536824, "r_y3": 104.00000011573796, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 71.84193505100733, "r_y0": 152.90926970226084, "r_x1": 153.088934155825, "r_y1": 152.90926970226084, "r_x2": 153.088934155825, "r_y2": 129.797125232046, "r_x3": 71.84193505100733, "r_y3": 129.797125232046, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "parsed_page": null, "predictions": {"layout": {"clusters": [{"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.99999977896756, "r": 504.8720051760782, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 73.34702132031646, "r_y0": 97.99999977896755, "r_x1": 503.64955224479564, "r_y1": 97.99999977896755, "r_x2": 503.64955224479564, "r_y2": 76.99999977896756, "r_x3": 73.34702132031646, "r_y3": 76.99999977896756, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 69.6796630536824, "r_y0": 124.83139494707741, "r_x1": 504.8720051760782, "r_y1": 124.83139494707741, "r_x2": 504.8720051760782, "r_y2": 104.00000011573796, "r_x3": 69.6796630536824, "r_y3": 104.00000011573796, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 71.84193505100733, "r_y0": 152.90926970226084, "r_x1": 153.088934155825, "r_y1": 152.90926970226084, "r_x2": 153.088934155825, "r_y2": 129.797125232046, "r_x3": 71.84193505100733, "r_y3": 129.797125232046, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.99999977896756, "r": 504.8720051760782, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 73.34702132031646, "r_y0": 97.99999977896755, "r_x1": 503.64955224479564, "r_y1": 97.99999977896755, "r_x2": 503.64955224479564, "r_y2": 76.99999977896756, "r_x3": 73.34702132031646, "r_y3": 76.99999977896756, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 69.6796630536824, "r_y0": 124.83139494707741, "r_x1": 504.8720051760782, "r_y1": 124.83139494707741, "r_x2": 504.8720051760782, "r_y2": 104.00000011573796, "r_x3": 69.6796630536824, "r_y3": 104.00000011573796, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 71.84193505100733, "r_y0": 152.90926970226084, "r_x1": 153.088934155825, "r_y1": 152.90926970226084, "r_x2": 153.088934155825, "r_y2": 129.797125232046, "r_x3": 71.84193505100733, "r_y3": 129.797125232046, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "body": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.99999977896756, "r": 504.8720051760782, "b": 152.90926970226084, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 73.34702132031646, "r_y0": 97.99999977896755, "r_x1": 503.64955224479564, "r_y1": 97.99999977896755, "r_x2": 503.64955224479564, "r_y2": 76.99999977896756, "r_x3": 73.34702132031646, "r_y3": 76.99999977896756, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 69.6796630536824, "r_y0": 124.83139494707741, "r_x1": 504.8720051760782, "r_y1": 124.83139494707741, "r_x2": 504.8720051760782, "r_y2": 104.00000011573796, "r_x3": 69.6796630536824, "r_y3": 104.00000011573796, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 71.84193505100733, "r_y0": 152.90926970226084, "r_x1": 153.088934155825, "r_y1": 152.90926970226084, "r_x2": 153.088934155825, "r_y2": 129.797125232046, "r_x3": 71.84193505100733, "r_y3": 129.797125232046, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 1.0, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "headers": []}}] |