From 609069d12c128f30fbf5132f017c9b62aed6a762 Mon Sep 17 00:00:00 2001 From: Christoph Auer <60343111+cau-git@users.noreply.github.com> Date: Fri, 5 Dec 2025 13:16:11 +0100 Subject: [PATCH] fix: Ensure proper image_scale for generated page images in VLM pipelines (#2728) * fix: Ensure proper image_scale is used for generated page images in layout+vlm pipeline Signed-off-by: Christoph Auer * fix: Ensure proper image_scale output in default VLM pipeline Signed-off-by: Christoph Auer --------- Signed-off-by: Christoph Auer --- docling/experimental/pipeline/threaded_layout_vlm_pipeline.py | 3 +++ docling/pipeline/vlm_pipeline.py | 3 +++ docs/examples/demo_layout_vlm.py | 4 ++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py b/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py index 92c0c104..db73db8d 100644 --- a/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py +++ b/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py @@ -232,9 +232,12 @@ class ThreadedLayoutVlmPipeline(BasePipeline): # Initialize pages start_page, end_page = conv_res.input.limits.page_range pages: List[Page] = [] + images_scale = self.pipeline_options.images_scale for i in range(conv_res.input.page_count): if start_page - 1 <= i <= end_page - 1: page = Page(page_no=i) + if images_scale is not None: + page._default_image_scale = images_scale page._backend = backend.load_page(i) if page._backend and page._backend.is_valid(): page.size = page._backend.get_size() diff --git a/docling/pipeline/vlm_pipeline.py b/docling/pipeline/vlm_pipeline.py index 6fabdb38..ab919c4d 100644 --- a/docling/pipeline/vlm_pipeline.py +++ b/docling/pipeline/vlm_pipeline.py @@ -114,6 +114,9 @@ class VlmPipeline(PaginatedPipeline): def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page: with TimeRecorder(conv_res, "page_init"): + images_scale = self.pipeline_options.images_scale + if images_scale is not None: + page._default_image_scale = images_scale page._backend = conv_res.input._backend.load_page(page.page_no) # type: ignore if page._backend is not None and page._backend.is_valid(): page.size = page._backend.get_size() diff --git a/docs/examples/demo_layout_vlm.py b/docs/examples/demo_layout_vlm.py index 18eb4aa1..13a5295f 100644 --- a/docs/examples/demo_layout_vlm.py +++ b/docs/examples/demo_layout_vlm.py @@ -113,7 +113,7 @@ def demo_threaded_layout_vlm_pipeline( # Queue configuration queue_max_size=10, # Image processing - images_scale=2.0, + images_scale=vlm_options.scale, generate_page_images=True, enable_remote_services=use_api_vlm, ) @@ -142,7 +142,7 @@ def demo_threaded_layout_vlm_pipeline( ) result_layout_aware.document.save_as_html( - out_dir_layout_aware / f"{doc_filename}.html" + out_dir_layout_aware / f"{doc_filename}.html", split_page_view=True ) for page in result_layout_aware.pages: _log.info("Page %s of VLM response:", page.page_no)