From d03439ccc54bab43bf4b5c79bb21a1491caeb728 Mon Sep 17 00:00:00 2001 From: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Date: Wed, 10 Dec 2025 20:43:20 +0100 Subject: [PATCH] docs(gpu): Add benchmarks of standard pipeline with OCR (#2764) * add results for standard + OCR and more Windows timings Signed-off-by: Michele Dolfi * fix runtime selection for py 3.14 in CI Signed-off-by: Michele Dolfi --------- Signed-off-by: Michele Dolfi --- docs/examples/parquet_images.py | 28 ++++++++++++++++++++++++++-- docs/usage/gpu.md | 3 ++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/docs/examples/parquet_images.py b/docs/examples/parquet_images.py index 11dc0402..aa37625b 100644 --- a/docs/examples/parquet_images.py +++ b/docs/examples/parquet_images.py @@ -27,6 +27,7 @@ # %% import io +import sys import time from pathlib import Path from typing import Annotated, Literal @@ -36,6 +37,7 @@ import typer from PIL import Image from docling.datamodel import vlm_model_specs +from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions from docling.datamodel.base_models import ConversionStatus, DocumentStream, InputFormat from docling.datamodel.pipeline_options import ( PdfPipelineOptions, @@ -47,8 +49,10 @@ from docling.datamodel.pipeline_options_vlm_model import ApiVlmOptions, Response from docling.datamodel.settings import settings from docling.document_converter import DocumentConverter, ImageFormatOption from docling.pipeline.base_pipeline import ConvertPipeline +from docling.pipeline.legacy_standard_pdf_pipeline import LegacyStandardPdfPipeline from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline from docling.pipeline.vlm_pipeline import VlmPipeline +from docling.utils.accelerator_utils import decide_device def process_document( @@ -97,12 +101,32 @@ def run( ), doc_size: int = 192, batch_size: int = 64, - pipeline: Literal["standard", "vlm"] = "standard", + pipeline: Literal["standard", "vlm", "legacy"] = "standard", ): + acc_opts = AcceleratorOptions() + device = decide_device(acc_opts.device) + + ocr_options = RapidOcrOptions() + if "cuda" in device: + ocr_options = RapidOcrOptions(backend="torch") + + # On Python 3.14 we only have torch + if sys.version_info >= (3, 14): + ocr_options = RapidOcrOptions(backend="torch") + if pipeline == "standard": pipeline_cls: type[ConvertPipeline] = StandardPdfPipeline pipeline_options: PipelineOptions = PdfPipelineOptions( - # ocr_options=RapidOcrOptions(backend="openvino"), + ocr_options=ocr_options, + ocr_batch_size=batch_size, + layout_batch_size=batch_size, + table_batch_size=4, + ) + elif pipeline == "legacy": + settings.perf.page_batch_size = batch_size + pipeline_cls: type[ConvertPipeline] = LegacyStandardPdfPipeline + pipeline_options: PipelineOptions = PdfPipelineOptions( + ocr_options=ocr_options, ocr_batch_size=batch_size, layout_batch_size=batch_size, table_batch_size=4, diff --git a/docs/usage/gpu.md b/docs/usage/gpu.md index eda52772..a1c0cab9 100644 --- a/docs/usage/gpu.md +++ b/docs/usage/gpu.md @@ -156,7 +156,8 @@ TBA. Standard - Inline (no OCR)3.1 pages/second-7.9 pages/second
[cpu-only]* 1.5 pages/second-4.2 pages/second
[cpu-only]* 1.2 pages/second- - VLM - Inference server (GraniteDocling)2.4 pages/second-3.8 pages/second3.6-4.5 pages/second-- + Standard - Inline (with OCR)tba1.6 pages/secondtba1.1 pages/second + VLM - Inference server (GraniteDocling)2.4 pages/second-3.8 pages/second3.6-4.5 pages/second2.0 pages/second2.8-3.2 pages/second