Skeleton for SmolDocling model and VLM Pipeline

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-01-08 10:16:54 +01:00
committed by Maksym Lysak
parent 1d17e7397a
commit dc3a388aa2
4 changed files with 215 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.pipeline.vlm_pipeline import VlmPipeline
source = "https://arxiv.org/pdf/2408.09869" # document per local path or URL
converter = DocumentConverter(
doc_converter=DocumentConverter(
format_options={InputFormat.PDF: PdfFormatOption(pipeline_cls=VlmPipeline)}
)
)
result = converter.convert(source)
print(result.document.export_to_markdown())
# output: ## Docling Technical Report [...]"