fix: main: Introduce format options for Image with the same pdf pipeline_options.

Add RapidOcrOptions to the Union of ocr_options for PdfPipelineOptions Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com>
2025-08-01 15:02:21 +00:00 · 2024-12-08 18:32:08 +01:00 · 2024-12-08 18:32:08 +01:00 · e125b9b24d
commit e125b9b24d
parent c830b92b2e
2 changed files with 5 additions and 3 deletions
--- a/docling/cli/main.py
+++ b/docling/cli/main.py
@ -342,11 +342,13 @@ def convert(
        else:
            raise RuntimeError(f"Unexpected PDF backend type {pdf_backend}")

-        format_options: Dict[InputFormat, FormatOption] = {
-            InputFormat.PDF: PdfFormatOption(
+        pdf_format_option = PdfFormatOption(
                pipeline_options=pipeline_options,
                backend=backend,  # pdf_backend
            )
+        format_options: Dict[InputFormat, FormatOption] = {
+            InputFormat.PDF: pdf_format_option,
+            InputFormat.IMAGE: pdf_format_option,
        }
        doc_converter = DocumentConverter(
            allowed_formats=from_formats,
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@ -143,7 +143,7 @@ class PdfPipelineOptions(PipelineOptions):

    table_structure_options: TableStructureOptions = TableStructureOptions()
    ocr_options: Union[
-        EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, OcrMacOptions
+        EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, OcrMacOptions, RapidOcrOptions
    ] = Field(EasyOcrOptions(), discriminator="kind")

    images_scale: float = 1.0