Added capability for vlm_pipeline to grab text from preconfigured backend

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
Maksym Lysak
2025-01-16 10:44:49 +01:00
parent e0929781f4
commit 0dc3ac43b1
2 changed files with 76 additions and 44 deletions

View File

@@ -12,15 +12,9 @@ from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.pipeline.vlm_pipeline import VlmPipeline
# source = "https://arxiv.org/pdf/2408.09869" # document per local path or URL
# source = "tests/data/2305.03393v1-pg9-img.png"
# source = "tests/data/2305.03393v1-pg9.pdf"
# source = "demo_data/page.png"
# source = "demo_data/original_tables.pdf"
sources = [
"tests/data/2305.03393v1-pg9-img.png",
# "tests/data/2305.03393v1-pg9.pdf",
# "tests/data/2305.03393v1-pg9-img.png",
"tests/data/2305.03393v1-pg9.pdf",
# "demo_data/page.png",
# "demo_data/original_tables.pdf",
]