Properly propagating image data per page, together with predicted tags in VLM pipeline. This enables correct figure extraction and page numbers in provenances

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
Maksym Lysak
2025-01-13 15:21:19 +01:00
parent 01c46e24b1
commit 61bb9dbba2
2 changed files with 229 additions and 218 deletions

View File

@@ -6,10 +6,11 @@ from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.pipeline.vlm_pipeline import VlmPipeline
# source = "https://arxiv.org/pdf/2408.09869" # document per local path or URL
source = "https://arxiv.org/pdf/2408.09869" # document per local path or URL
# source = "tests/data/2305.03393v1-pg9-img.png"
source = "tests/data/2305.03393v1-pg9.pdf"
# source = "page.png"
# source = "tests/data/2305.03393v1-pg9.pdf"
# source = "demo_data/page.png"
# source = "demo_data/original_tables.pdf"
pipeline_options = PdfPipelineOptions()
pipeline_options.generate_page_images = True