{ "schema_name": "DoclingDocument", "version": "1.8.0", "name": "html_code_snippets", "origin": { "mimetype": "text/html", "binary_hash": 8468578485215893920, "filename": "html_code_snippets.html" }, "furniture": { "self_ref": "#/furniture", "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified" }, "body": { "self_ref": "#/body", "children": [ { "$ref": "#/texts/0" }, { "$ref": "#/texts/1" } ], "content_layer": "body", "name": "_root_", "label": "unspecified" }, "groups": [ { "self_ref": "#/groups/0", "parent": { "$ref": "#/texts/1" }, "children": [ { "$ref": "#/texts/2" }, { "$ref": "#/texts/3" }, { "$ref": "#/texts/4" }, { "$ref": "#/texts/5" }, { "$ref": "#/texts/6" }, { "$ref": "#/texts/7" }, { "$ref": "#/texts/8" } ], "content_layer": "body", "name": "group", "label": "inline" }, { "self_ref": "#/groups/1", "parent": { "$ref": "#/texts/1" }, "children": [ { "$ref": "#/texts/9" }, { "$ref": "#/texts/10" }, { "$ref": "#/texts/11" }, { "$ref": "#/texts/12" } ], "content_layer": "body", "name": "group", "label": "inline" }, { "self_ref": "#/groups/2", "parent": { "$ref": "#/texts/1" }, "children": [ { "$ref": "#/texts/13" }, { "$ref": "#/texts/14" }, { "$ref": "#/texts/15" } ], "content_layer": "body", "name": "group", "label": "inline" }, { "self_ref": "#/groups/3", "parent": { "$ref": "#/texts/1" }, "children": [ { "$ref": "#/texts/17" }, { "$ref": "#/texts/18" } ], "content_layer": "body", "name": "group", "label": "inline" }, { "self_ref": "#/groups/4", "parent": { "$ref": "#/texts/1" }, "children": [ { "$ref": "#/texts/20" }, { "$ref": "#/texts/24" }, { "$ref": "#/texts/28" } ], "content_layer": "body", "name": "list", "label": "list" }, { "self_ref": "#/groups/5", "parent": { "$ref": "#/texts/20" }, "children": [ { "$ref": "#/texts/21" }, { "$ref": "#/texts/22" }, { "$ref": "#/texts/23" } ], "content_layer": "body", "name": "group", "label": "inline" }, { "self_ref": "#/groups/6", "parent": { "$ref": "#/texts/24" }, "children": [ { "$ref": "#/texts/25" }, { "$ref": "#/texts/26" }, { "$ref": "#/texts/27" } ], "content_layer": "body", "name": "group", "label": "inline" }, { "self_ref": "#/groups/7", "parent": { "$ref": "#/texts/28" }, "children": [ { "$ref": "#/texts/29" }, { "$ref": "#/texts/30" } ], "content_layer": "body", "name": "group", "label": "inline" } ], "texts": [ { "self_ref": "#/texts/0", "parent": { "$ref": "#/body" }, "children": [], "content_layer": "furniture", "label": "title", "prov": [], "orig": "Code snippets in HTML", "text": "Code snippets in HTML" }, { "self_ref": "#/texts/1", "parent": { "$ref": "#/body" }, "children": [ { "$ref": "#/groups/0" }, { "$ref": "#/groups/1" }, { "$ref": "#/groups/2" }, { "$ref": "#/texts/16" }, { "$ref": "#/groups/3" }, { "$ref": "#/texts/19" }, { "$ref": "#/groups/4" } ], "content_layer": "body", "label": "title", "prov": [], "orig": "Code snippets", "text": "Code snippets" }, { "self_ref": "#/texts/2", "parent": { "$ref": "#/groups/0" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": "The Pythagorean theorem can be written as an equation relating the lengths of the sides", "text": "The Pythagorean theorem can be written as an equation relating the lengths of the sides" }, { "self_ref": "#/texts/3", "parent": { "$ref": "#/groups/0" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": "a", "text": "a", "formatting": { "bold": false, "italic": true, "underline": false, "strikethrough": false, "script": "baseline" } }, { "self_ref": "#/texts/4", "parent": { "$ref": "#/groups/0" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": ",", "text": "," }, { "self_ref": "#/texts/5", "parent": { "$ref": "#/groups/0" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": "b", "text": "b", "formatting": { "bold": false, "italic": true, "underline": false, "strikethrough": false, "script": "baseline" } }, { "self_ref": "#/texts/6", "parent": { "$ref": "#/groups/0" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": "and the hypotenuse", "text": "and the hypotenuse" }, { "self_ref": "#/texts/7", "parent": { "$ref": "#/groups/0" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": "c", "text": "c", "formatting": { "bold": false, "italic": true, "underline": false, "strikethrough": false, "script": "baseline" } }, { "self_ref": "#/texts/8", "parent": { "$ref": "#/groups/0" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": ".", "text": "." }, { "self_ref": "#/texts/9", "parent": { "$ref": "#/groups/1" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": "To use Docling, simply install", "text": "To use Docling, simply install" }, { "self_ref": "#/texts/10", "parent": { "$ref": "#/groups/1" }, "children": [], "content_layer": "body", "label": "code", "prov": [], "orig": "docling", "text": "docling", "captions": [], "references": [], "footnotes": [], "code_language": "unknown" }, { "self_ref": "#/texts/11", "parent": { "$ref": "#/groups/1" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": "from your package manager, e.g. pip:", "text": "from your package manager, e.g. pip:" }, { "self_ref": "#/texts/12", "parent": { "$ref": "#/groups/1" }, "children": [], "content_layer": "body", "label": "code", "prov": [], "orig": "pip install docling", "text": "pip install docling", "captions": [], "references": [], "footnotes": [], "code_language": "unknown" }, { "self_ref": "#/texts/13", "parent": { "$ref": "#/groups/2" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": "To convert individual documents with python, use", "text": "To convert individual documents with python, use" }, { "self_ref": "#/texts/14", "parent": { "$ref": "#/groups/2" }, "children": [], "content_layer": "body", "label": "code", "prov": [], "orig": "convert()", "text": "convert()", "captions": [], "references": [], "footnotes": [], "code_language": "unknown" }, { "self_ref": "#/texts/15", "parent": { "$ref": "#/groups/2" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": ", for example:", "text": ", for example:" }, { "self_ref": "#/texts/16", "parent": { "$ref": "#/texts/1" }, "children": [], "content_layer": "body", "label": "code", "prov": [], "orig": "from docling.document_converter import DocumentConverter\n\nsource = \"https://arxiv.org/pdf/2408.09869\"\nconverter = DocumentConverter()\nresult = converter.convert(source)\nprint(result.document.export_to_markdown())", "text": "from docling.document_converter import DocumentConverter\n\nsource = \"https://arxiv.org/pdf/2408.09869\"\nconverter = DocumentConverter()\nresult = converter.convert(source)\nprint(result.document.export_to_markdown())", "captions": [], "references": [], "footnotes": [], "code_language": "unknown" }, { "self_ref": "#/texts/17", "parent": { "$ref": "#/groups/3" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": "The program will output:", "text": "The program will output:" }, { "self_ref": "#/texts/18", "parent": { "$ref": "#/groups/3" }, "children": [], "content_layer": "body", "label": "code", "prov": [], "orig": "## Docling Technical Report[...]", "text": "## Docling Technical Report[...]", "captions": [], "references": [], "footnotes": [], "code_language": "unknown" }, { "self_ref": "#/texts/19", "parent": { "$ref": "#/texts/1" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": "Prefetch the models:", "text": "Prefetch the models:" }, { "self_ref": "#/texts/20", "parent": { "$ref": "#/groups/4" }, "children": [ { "$ref": "#/groups/5" } ], "content_layer": "body", "label": "list_item", "prov": [], "orig": "", "text": "", "enumerated": false, "marker": "" }, { "self_ref": "#/texts/21", "parent": { "$ref": "#/groups/5" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": "Use the", "text": "Use the" }, { "self_ref": "#/texts/22", "parent": { "$ref": "#/groups/5" }, "children": [], "content_layer": "body", "label": "code", "prov": [], "orig": "docling-tools models download", "text": "docling-tools models download", "captions": [], "references": [], "footnotes": [], "code_language": "unknown" }, { "self_ref": "#/texts/23", "parent": { "$ref": "#/groups/5" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": "utility:", "text": "utility:" }, { "self_ref": "#/texts/24", "parent": { "$ref": "#/groups/4" }, "children": [ { "$ref": "#/groups/6" } ], "content_layer": "body", "label": "list_item", "prov": [], "orig": "", "text": "", "enumerated": false, "marker": "" }, { "self_ref": "#/texts/25", "parent": { "$ref": "#/groups/6" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": "Alternatively, models can be programmatically downloaded using", "text": "Alternatively, models can be programmatically downloaded using" }, { "self_ref": "#/texts/26", "parent": { "$ref": "#/groups/6" }, "children": [], "content_layer": "body", "label": "code", "prov": [], "orig": "docling.utils.model_downloader.download_models()", "text": "docling.utils.model_downloader.download_models()", "captions": [], "references": [], "footnotes": [], "code_language": "unknown" }, { "self_ref": "#/texts/27", "parent": { "$ref": "#/groups/6" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": ".", "text": "." }, { "self_ref": "#/texts/28", "parent": { "$ref": "#/groups/4" }, "children": [ { "$ref": "#/groups/7" } ], "content_layer": "body", "label": "list_item", "prov": [], "orig": "", "text": "", "enumerated": false, "marker": "" }, { "self_ref": "#/texts/29", "parent": { "$ref": "#/groups/7" }, "children": [], "content_layer": "body", "label": "text", "prov": [], "orig": "Also, you can use download-hf-repo parameter to download arbitrary models from HuggingFace by specifying repo id:", "text": "Also, you can use download-hf-repo parameter to download arbitrary models from HuggingFace by specifying repo id:" }, { "self_ref": "#/texts/30", "parent": { "$ref": "#/groups/7" }, "children": [], "content_layer": "body", "label": "code", "prov": [], "orig": "$ docling-tools models download-hf-repo ds4sd/SmolDocling-256M-preview Downloading ds4sd/SmolDocling-256M-preview model from HuggingFace...", "text": "$ docling-tools models download-hf-repo ds4sd/SmolDocling-256M-preview Downloading ds4sd/SmolDocling-256M-preview model from HuggingFace...", "captions": [], "references": [], "footnotes": [], "code_language": "unknown" } ], "pictures": [], "tables": [], "key_value_items": [], "form_items": [], "pages": {} }