mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 03:55:00 +00:00
* updated the cli to output html in split-page mode Signed-off-by: Peter Staar <taa@zurich.ibm.com> * add pin for new docling-core with html split argument Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * relock with fixed html export in docling-core Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update test results Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update more tests Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update example Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update lock with docling-core fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update test results Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add again chunking extras Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Peter Staar <taa@zurich.ibm.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
1 line
1.2 KiB
JSON
1 line
1.2 KiB
JSON
{"schema_name": "DoclingDocument", "version": "1.3.0", "name": "ocr_test", "origin": {"mimetype": "application/pdf", "binary_hash": 14853448746796404529, "filename": "ocr_test.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}], "content_layer": "body", "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 69.6796630536824, "t": 764.9216921155637, "r": 504.8720051760782, "b": 689.0124221922704, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 94]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "formatting": null, "hyperlink": null}], "pictures": [], "tables": [], "key_value_items": [], "form_items": [], "pages": {"1": {"size": {"width": 595.201171875, "height": 841.9216918945312}, "image": null, "page_no": 1}}} |