diff --git a/docs/use_docling.md b/docs/use_docling.md index 733af19d..84e0df0d 100644 --- a/docs/use_docling.md +++ b/docs/use_docling.md @@ -139,14 +139,23 @@ You can perform a hierarchy-aware chunking of a Docling document as follows: from docling.document_converter import DocumentConverter from docling_core.transforms.chunker import HierarchicalChunker -doc = DocumentConverter().convert("https://arxiv.org/pdf/2206.01062").legacy_document +conv_res = DocumentConverter().convert("https://arxiv.org/pdf/2206.01062") +doc = conv_res.document chunks = list(HierarchicalChunker().chunk(doc)) -print(chunks[0]) -# ChunkWithMetadata( -# path='#/main-text/1', -# text='DocLayNet: A Large Human-Annotated Dataset [...]', -# page=1, -# bbox=[107.30, 672.38, 505.19, 709.08], -# [...] -# ) + +print(chunks[30]) +# { +# "text": "Lately, new types of ML models for document-layout analysis have emerged [...]", +# "meta": { +# "doc_items": [{ +# "self_ref": "#/texts/40", +# "label": "text", +# "prov": [{ +# "page_no": 2, +# "bbox": {"l": 317.06, "t": 325.81, "r": 559.18, "b": 239.97, ...}, +# }] +# }], +# "headings": ["2 RELATED WORK"], +# } +# } ``` diff --git a/mkdocs.yml b/mkdocs.yml index 5fd180a4..f844b075 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -54,7 +54,7 @@ nav: - Get started: - Home: index.md - Installation: installation.md - - Use Docling: use_docling.md + - Usage: use_docling.md - Docling v2: v2.md - Concepts: - The Docling Document format: concepts/docling_format.md