mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
docs: simplify LlamaIndex example using Docling extension (#135)
Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
This commit is contained in:
17
README.md
17
README.md
@@ -289,15 +289,14 @@ from docling_core.transforms.chunker import HierarchicalChunker
|
||||
|
||||
doc = DocumentConverter().convert_single("https://arxiv.org/pdf/2206.01062").output
|
||||
chunks = list(HierarchicalChunker().chunk(doc))
|
||||
# > [
|
||||
# > ChunkWithMetadata(
|
||||
# > path='$.main-text[0]',
|
||||
# > text='DocLayNet: A Large Human-Annotated Dataset [...]',
|
||||
# > page=1,
|
||||
# > bbox=[107.30, 672.38, 505.19, 709.08]
|
||||
# > ),
|
||||
# > [...]
|
||||
# > ]
|
||||
print(chunks[0])
|
||||
# ChunkWithMetadata(
|
||||
# path='#/main-text/1',
|
||||
# text='DocLayNet: A Large Human-Annotated Dataset [...]',
|
||||
# page=1,
|
||||
# bbox=[107.30, 672.38, 505.19, 709.08],
|
||||
# [...]
|
||||
# )
|
||||
```
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user