docs: simplify LlamaIndex example using Docling extension (#135)

Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
This commit is contained in:
Panos Vagenas
2024-10-09 22:17:56 +02:00
committed by GitHub
parent 6924999f1f
commit 5f1bd9e9c8
4 changed files with 428 additions and 355 deletions

View File

@@ -289,15 +289,14 @@ from docling_core.transforms.chunker import HierarchicalChunker
doc = DocumentConverter().convert_single("https://arxiv.org/pdf/2206.01062").output
chunks = list(HierarchicalChunker().chunk(doc))
# > [
# > ChunkWithMetadata(
# > path='$.main-text[0]',
# > text='DocLayNet: A Large Human-Annotated Dataset [...]',
# > page=1,
# > bbox=[107.30, 672.38, 505.19, 709.08]
# > ),
# > [...]
# > ]
print(chunks[0])
# ChunkWithMetadata(
# path='#/main-text/1',
# text='DocLayNet: A Large Human-Annotated Dataset [...]',
# page=1,
# bbox=[107.30, 672.38, 505.19, 709.08],
# [...]
# )
```