From 58fc6ccf86ded24dca62982badd36cab1a3cf689 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 19 Nov 2025 13:25:00 +0000 Subject: [PATCH] Fix page_range stopping at page 32 by using dynamic batch_size Co-authored-by: cau-git <60343111+cau-git@users.noreply.github.com> --- docling/experimental/pipeline/threaded_layout_vlm_pipeline.py | 2 +- docling/pipeline/standard_pdf_pipeline.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py b/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py index 92c0c104..7c339651 100644 --- a/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py +++ b/docling/experimental/pipeline/threaded_layout_vlm_pipeline.py @@ -252,7 +252,7 @@ class ThreadedLayoutVlmPipeline(BasePipeline): proc = ProcessingResult(total_expected=total_pages) fed_idx = 0 - batch_size = 32 + batch_size = total_pages try: while proc.success_count + proc.failure_count < total_pages: diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py index 6c662c0e..27bb6dee 100644 --- a/docling/pipeline/standard_pdf_pipeline.py +++ b/docling/pipeline/standard_pdf_pipeline.py @@ -545,7 +545,7 @@ class StandardPdfPipeline(ConvertPipeline): proc = ProcessingResult(total_expected=total_pages) fed_idx: int = 0 # number of pages successfully queued - batch_size: int = 32 # drain chunk + batch_size: int = total_pages # drain chunk try: while proc.success_count + proc.failure_count < total_pages: # 1) feed - try to enqueue until the first queue is full