mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
14 lines
545 B
Python
14 lines
545 B
Python
from docling.datamodel.base_models import InputFormat
|
|
from docling.document_converter import DocumentConverter, PdfFormatOption
|
|
from docling.pipeline.vlm_pipeline import VlmPipeline
|
|
|
|
source = "https://arxiv.org/pdf/2408.09869" # document per local path or URL
|
|
converter = DocumentConverter(
|
|
doc_converter=DocumentConverter(
|
|
format_options={InputFormat.PDF: PdfFormatOption(pipeline_cls=VlmPipeline)}
|
|
)
|
|
)
|
|
result = converter.convert(source)
|
|
print(result.document.export_to_markdown())
|
|
# output: ## Docling Technical Report [...]"
|