fix: Ensure proper image_scale for generated page images in VLM pipelines (#2728)

* fix: Ensure proper image_scale is used for generated page images in layout+vlm pipeline

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* fix: Ensure proper image_scale output in default VLM pipeline

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-12-05 13:16:11 +01:00
committed by GitHub
parent d007ba0e6f
commit 609069d12c
3 changed files with 8 additions and 2 deletions

View File

@@ -232,9 +232,12 @@ class ThreadedLayoutVlmPipeline(BasePipeline):
# Initialize pages # Initialize pages
start_page, end_page = conv_res.input.limits.page_range start_page, end_page = conv_res.input.limits.page_range
pages: List[Page] = [] pages: List[Page] = []
images_scale = self.pipeline_options.images_scale
for i in range(conv_res.input.page_count): for i in range(conv_res.input.page_count):
if start_page - 1 <= i <= end_page - 1: if start_page - 1 <= i <= end_page - 1:
page = Page(page_no=i) page = Page(page_no=i)
if images_scale is not None:
page._default_image_scale = images_scale
page._backend = backend.load_page(i) page._backend = backend.load_page(i)
if page._backend and page._backend.is_valid(): if page._backend and page._backend.is_valid():
page.size = page._backend.get_size() page.size = page._backend.get_size()

View File

@@ -114,6 +114,9 @@ class VlmPipeline(PaginatedPipeline):
def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page: def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:
with TimeRecorder(conv_res, "page_init"): with TimeRecorder(conv_res, "page_init"):
images_scale = self.pipeline_options.images_scale
if images_scale is not None:
page._default_image_scale = images_scale
page._backend = conv_res.input._backend.load_page(page.page_no) # type: ignore page._backend = conv_res.input._backend.load_page(page.page_no) # type: ignore
if page._backend is not None and page._backend.is_valid(): if page._backend is not None and page._backend.is_valid():
page.size = page._backend.get_size() page.size = page._backend.get_size()

View File

@@ -113,7 +113,7 @@ def demo_threaded_layout_vlm_pipeline(
# Queue configuration # Queue configuration
queue_max_size=10, queue_max_size=10,
# Image processing # Image processing
images_scale=2.0, images_scale=vlm_options.scale,
generate_page_images=True, generate_page_images=True,
enable_remote_services=use_api_vlm, enable_remote_services=use_api_vlm,
) )
@@ -142,7 +142,7 @@ def demo_threaded_layout_vlm_pipeline(
) )
result_layout_aware.document.save_as_html( result_layout_aware.document.save_as_html(
out_dir_layout_aware / f"{doc_filename}.html" out_dir_layout_aware / f"{doc_filename}.html", split_page_view=True
) )
for page in result_layout_aware.pages: for page in result_layout_aware.pages:
_log.info("Page %s of VLM response:", page.page_no) _log.info("Page %s of VLM response:", page.page_no)