mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 15:02:21 +00:00
fix: main: Introduce format options for Image with the same pdf pipeline_options.
Add RapidOcrOptions to the Union of ocr_options for PdfPipelineOptions Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com>
This commit is contained in:
parent
c830b92b2e
commit
e125b9b24d
@ -342,11 +342,13 @@ def convert(
|
||||
else:
|
||||
raise RuntimeError(f"Unexpected PDF backend type {pdf_backend}")
|
||||
|
||||
format_options: Dict[InputFormat, FormatOption] = {
|
||||
InputFormat.PDF: PdfFormatOption(
|
||||
pdf_format_option = PdfFormatOption(
|
||||
pipeline_options=pipeline_options,
|
||||
backend=backend, # pdf_backend
|
||||
)
|
||||
format_options: Dict[InputFormat, FormatOption] = {
|
||||
InputFormat.PDF: pdf_format_option,
|
||||
InputFormat.IMAGE: pdf_format_option,
|
||||
}
|
||||
doc_converter = DocumentConverter(
|
||||
allowed_formats=from_formats,
|
||||
|
@ -143,7 +143,7 @@ class PdfPipelineOptions(PipelineOptions):
|
||||
|
||||
table_structure_options: TableStructureOptions = TableStructureOptions()
|
||||
ocr_options: Union[
|
||||
EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, OcrMacOptions
|
||||
EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, OcrMacOptions, RapidOcrOptions
|
||||
] = Field(EasyOcrOptions(), discriminator="kind")
|
||||
|
||||
images_scale: float = 1.0
|
||||
|
Loading…
Reference in New Issue
Block a user