diff --git a/docling/cli/main.py b/docling/cli/main.py index 87a93d15..a2e6962e 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -342,11 +342,13 @@ def convert( else: raise RuntimeError(f"Unexpected PDF backend type {pdf_backend}") - format_options: Dict[InputFormat, FormatOption] = { - InputFormat.PDF: PdfFormatOption( + pdf_format_option = PdfFormatOption( pipeline_options=pipeline_options, backend=backend, # pdf_backend ) + format_options: Dict[InputFormat, FormatOption] = { + InputFormat.PDF: pdf_format_option, + InputFormat.IMAGE: pdf_format_option, } doc_converter = DocumentConverter( allowed_formats=from_formats, diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 63e0d3c6..13560132 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -143,7 +143,7 @@ class PdfPipelineOptions(PipelineOptions): table_structure_options: TableStructureOptions = TableStructureOptions() ocr_options: Union[ - EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, OcrMacOptions + EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, OcrMacOptions, RapidOcrOptions ] = Field(EasyOcrOptions(), discriminator="kind") images_scale: float = 1.0