Properly propagating image data per page, together with predicted tags in VLM pipeline. This enables correct figure extraction and page numbers in provenances

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
2025-12-08 20:58:11 +00:00 · 2025-01-13 15:21:19 +01:00
parent 01c46e24b1
commit 61bb9dbba2
2 changed files with 229 additions and 218 deletions
--- a/docs/examples/minimal_smol_docling.py
+++ b/docs/examples/minimal_smol_docling.py
@@ -6,10 +6,11 @@ from docling.datamodel.pipeline_options import PdfPipelineOptions
 from docling.document_converter import DocumentConverter, PdfFormatOption
 from docling.pipeline.vlm_pipeline import VlmPipeline

-# source = "https://arxiv.org/pdf/2408.09869"  # document per local path or URL
+source = "https://arxiv.org/pdf/2408.09869"  # document per local path or URL
 # source = "tests/data/2305.03393v1-pg9-img.png"
-source = "tests/data/2305.03393v1-pg9.pdf"
-# source = "page.png"
+# source = "tests/data/2305.03393v1-pg9.pdf"
+# source = "demo_data/page.png"
+# source = "demo_data/original_tables.pdf"

 pipeline_options = PdfPipelineOptions()
 pipeline_options.generate_page_images = True