mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-02 07:22:14 +00:00
updated the README, still need to update the docsa
Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
parent
211f4f7570
commit
0762986cf9
21
README.md
21
README.md
@ -57,12 +57,31 @@ More [detailed installation instructions](https://ds4sd.github.io/docling/instal
|
|||||||
To convert individual documents, use `convert()`, for example:
|
To convert individual documents, use `convert()`, for example:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
from docling_core.types.doc.base import ImageRefMode
|
||||||
|
from docling_core.types.doc.document import DoclingDocument
|
||||||
|
|
||||||
from docling.document_converter import DocumentConverter
|
from docling.document_converter import DocumentConverter
|
||||||
|
|
||||||
source = "https://arxiv.org/pdf/2408.09869" # document per local path or URL
|
source = "https://arxiv.org/pdf/2408.09869" # document per local path or URL
|
||||||
converter = DocumentConverter()
|
converter = DocumentConverter()
|
||||||
result = converter.convert(source)
|
result = converter.convert(source)
|
||||||
print(result.document.export_to_markdown()) # output: "## Docling Technical Report[...]"
|
|
||||||
|
doc: DoclingDocument = result.document
|
||||||
|
|
||||||
|
# returning the document in-memory
|
||||||
|
print(doc.export_to_markdown()) # output: "## Docling Technical Report[...]"
|
||||||
|
print(doc.export_to_html()) # output: "<h1>Docling Technical Report[...]"
|
||||||
|
|
||||||
|
# save document to disk, keep only placeholders for images
|
||||||
|
doc.save_as_markdown(filename="output_pl.md", image_mode=ImageRefMode.PLACEHOLDER)
|
||||||
|
|
||||||
|
# save document to disk, embed images in base64
|
||||||
|
doc.save_as_markdown(filename="output_emb.md", image_mode=ImageRefMode.EMBEDDED)
|
||||||
|
doc.save_as_html(filename="output_emb.html", image_mode=ImageRefMode.EMBEDDED)
|
||||||
|
|
||||||
|
# save document to disk, save images in PNG format and reference them in markdown
|
||||||
|
doc.save_as_markdown(filename="output_ref.md", image_mode=ImageRefMode.REFERENCED)
|
||||||
|
doc.save_as_html(filename="output_ref.html", image_mode=ImageRefMode.REFERENCED)
|
||||||
```
|
```
|
||||||
|
|
||||||
Check out [Getting started](https://ds4sd.github.io/docling/).
|
Check out [Getting started](https://ds4sd.github.io/docling/).
|
||||||
|
Loading…
Reference in New Issue
Block a user