From c23916488bd9169c3f80bcad29c73b1dac3ff566 Mon Sep 17 00:00:00 2001
From: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
Date: Wed, 16 Oct 2024 22:12:34 +0200
Subject: [PATCH] docs: update chunking docs, rename page

[skip ci]

Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
---
 docs/use_docling.md | 27 ++++++++++++++++++---------
 mkdocs.yml          |  2 +-
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/docs/use_docling.md b/docs/use_docling.md
index 733af19d..84e0df0d 100644
--- a/docs/use_docling.md
+++ b/docs/use_docling.md
@@ -139,14 +139,23 @@ You can perform a hierarchy-aware chunking of a Docling document as follows:
 from docling.document_converter import DocumentConverter
 from docling_core.transforms.chunker import HierarchicalChunker
 
-doc = DocumentConverter().convert("https://arxiv.org/pdf/2206.01062").legacy_document
+conv_res = DocumentConverter().convert("https://arxiv.org/pdf/2206.01062")
+doc = conv_res.document
 chunks = list(HierarchicalChunker().chunk(doc))
-print(chunks[0])
-# ChunkWithMetadata(
-#     path='#/main-text/1',
-#     text='DocLayNet: A Large Human-Annotated Dataset [...]',
-#     page=1,
-#     bbox=[107.30, 672.38, 505.19, 709.08],
-#     [...]
-# )
+
+print(chunks[30])
+# {
+#   "text": "Lately, new types of ML models for document-layout analysis have emerged [...]",
+#   "meta": {
+#     "doc_items": [{
+#       "self_ref": "#/texts/40",
+#       "label": "text",
+#       "prov": [{
+#         "page_no": 2,
+#         "bbox": {"l": 317.06, "t": 325.81, "r": 559.18, "b": 239.97, ...},
+#       }]
+#     }],
+#     "headings": ["2 RELATED WORK"],
+#   }
+# }
 ```
diff --git a/mkdocs.yml b/mkdocs.yml
index 5fd180a4..f844b075 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -54,7 +54,7 @@ nav:
   - Get started:
     - Home: index.md
     - Installation: installation.md
-    - Use Docling: use_docling.md
+    - Usage: use_docling.md
     - Docling v2: v2.md
   - Concepts:
     - The Docling Document format: concepts/docling_format.md