mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
Properly propagating image data per page, together with predicted tags in VLM pipeline. This enables correct figure extraction and page numbers in provenances
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
@@ -6,10 +6,11 @@ from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||
from docling.pipeline.vlm_pipeline import VlmPipeline
|
||||
|
||||
# source = "https://arxiv.org/pdf/2408.09869" # document per local path or URL
|
||||
source = "https://arxiv.org/pdf/2408.09869" # document per local path or URL
|
||||
# source = "tests/data/2305.03393v1-pg9-img.png"
|
||||
source = "tests/data/2305.03393v1-pg9.pdf"
|
||||
# source = "page.png"
|
||||
# source = "tests/data/2305.03393v1-pg9.pdf"
|
||||
# source = "demo_data/page.png"
|
||||
# source = "demo_data/original_tables.pdf"
|
||||
|
||||
pipeline_options = PdfPipelineOptions()
|
||||
pipeline_options.generate_page_images = True
|
||||
|
||||
Reference in New Issue
Block a user