Added capability for vlm_pipeline to grab text from preconfigured backend

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
2025-12-08 20:58:11 +00:00 · 2025-01-16 10:44:49 +01:00
parent e0929781f4
commit 0dc3ac43b1
2 changed files with 76 additions and 44 deletions
--- a/docs/examples/minimal_smol_docling.py
+++ b/docs/examples/minimal_smol_docling.py
@@ -12,15 +12,9 @@ from docling.datamodel.pipeline_options import PdfPipelineOptions
 from docling.document_converter import DocumentConverter, PdfFormatOption
 from docling.pipeline.vlm_pipeline import VlmPipeline

-# source = "https://arxiv.org/pdf/2408.09869"  # document per local path or URL
-# source = "tests/data/2305.03393v1-pg9-img.png"
-# source = "tests/data/2305.03393v1-pg9.pdf"
-# source = "demo_data/page.png"
-# source = "demo_data/original_tables.pdf"
-
 sources = [
-    "tests/data/2305.03393v1-pg9-img.png",
-    # "tests/data/2305.03393v1-pg9.pdf",
+    # "tests/data/2305.03393v1-pg9-img.png",
+    "tests/data/2305.03393v1-pg9.pdf",
    # "demo_data/page.png",
    # "demo_data/original_tables.pdf",
 ]