diff --git a/docling/cli/main.py b/docling/cli/main.py index 39201ffe..54724383 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -26,11 +26,11 @@ from docling.datamodel.pipeline_options import ( EasyOcrOptions, OcrMacOptions, OcrOptions, + PaddleOcrOptions, PdfPipelineOptions, TableFormerMode, TesseractCliOcrOptions, TesseractOcrOptions, - PaddleOcrOptions ) from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption @@ -264,7 +264,7 @@ def convert( ocr_options = TesseractOcrOptions(force_full_page_ocr=force_ocr) elif ocr_engine == OcrEngine.OCRMAC: ocr_options = OcrMacOptions(force_full_page_ocr=force_ocr) - elif ocr_engine == OcrEngine.PADDLEOCR: + elif ocr_engine == OcrEngine.PADDLEOCR: ocr_options = PaddleOcrOptions(force_full_page_ocr=force_ocr) else: raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}") diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 4e8617c7..80724c41 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -1,6 +1,6 @@ from enum import Enum from pathlib import Path -from typing import List, Literal, Optional, Union, Annotated +from typing import Annotated, List, Literal, Optional, Union from pydantic import BaseModel, ConfigDict, Field @@ -41,11 +41,12 @@ class EasyOcrOptions(OcrOptions): protected_namespaces=(), ) + class PaddleOcrOptions(OcrOptions): kind: Literal["paddleocr"] = "paddleocr" lang: Annotated[ list[str], - Field(min_items=1, max_items=1) # Limits the list length to 0 or 1 items + Field(min_items=1, max_items=1), # Limits the list length to 0 or 1 items ] = ["en"] use_gpu: bool = True # same default as paddleocr.ocr use_angle_cls: bool = True @@ -102,7 +103,11 @@ class PdfPipelineOptions(PipelineOptions): table_structure_options: TableStructureOptions = TableStructureOptions() ocr_options: Union[ - EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, PaddleOcrOptions, OcrMacOptions + EasyOcrOptions, + TesseractCliOcrOptions, + TesseractOcrOptions, + PaddleOcrOptions, + OcrMacOptions, ] = Field(EasyOcrOptions(), discriminator="kind") images_scale: float = 1.0 diff --git a/docling/models/paddle_ocr_model.py b/docling/models/paddle_ocr_model.py index 5547a6c0..19a209a6 100644 --- a/docling/models/paddle_ocr_model.py +++ b/docling/models/paddle_ocr_model.py @@ -23,7 +23,7 @@ class PaddleOcrModel(BaseOcrModel): if self.enabled: try: - from paddleocr import PaddleOCR, draw_ocr + from paddleocr import PaddleOCR, draw_ocr # type: ignore except ImportError: raise ImportError( "PaddleOCR is not installed. Please install it via `pip install paddlepaddle` and `pip install paddleocr` to use this OCR engine. " @@ -33,7 +33,7 @@ class PaddleOcrModel(BaseOcrModel): self.reader = PaddleOCR( lang=self.options.lang[0], use_gpu=self.options.use_gpu, - use_angle_cls=self.options.use_angle_cls, + use_angle_cls=self.options.use_angle_cls, show_log=self.options.show_log, ) diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py index 2908d0ee..916228cc 100644 --- a/docling/pipeline/standard_pdf_pipeline.py +++ b/docling/pipeline/standard_pdf_pipeline.py @@ -12,17 +12,17 @@ from docling.datamodel.document import ConversionResult from docling.datamodel.pipeline_options import ( EasyOcrOptions, OcrMacOptions, + PaddleOcrOptions, PdfPipelineOptions, TesseractCliOcrOptions, TesseractOcrOptions, - PaddleOcrOptions, ) from docling.models.base_ocr_model import BaseOcrModel from docling.models.ds_glm_model import GlmModel, GlmOptions from docling.models.easyocr_model import EasyOcrModel -from docling.models.paddle_ocr_model import PaddleOcrModel from docling.models.layout_model import LayoutModel from docling.models.ocr_mac_model import OcrMacModel +from docling.models.paddle_ocr_model import PaddleOcrModel from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions from docling.models.page_preprocessing_model import ( PagePreprocessingModel, diff --git a/docs/examples/full_page_ocr.py b/docs/examples/full_page_ocr.py index 251617a3..40702f3d 100644 --- a/docs/examples/full_page_ocr.py +++ b/docs/examples/full_page_ocr.py @@ -5,10 +5,10 @@ from docling.datamodel.base_models import InputFormat from docling.datamodel.pipeline_options import ( EasyOcrOptions, OcrMacOptions, + PaddleOcrOptions, PdfPipelineOptions, TesseractCliOcrOptions, TesseractOcrOptions, - PaddleOcrOptions ) from docling.document_converter import DocumentConverter, PdfFormatOption diff --git a/tests/test_e2e_ocr_conversion.py b/tests/test_e2e_ocr_conversion.py index 99cfb26b..d1e34fcf 100644 --- a/tests/test_e2e_ocr_conversion.py +++ b/tests/test_e2e_ocr_conversion.py @@ -9,10 +9,10 @@ from docling.datamodel.pipeline_options import ( EasyOcrOptions, OcrMacOptions, OcrOptions, + PaddleOcrOptions, PdfPipelineOptions, TesseractCliOcrOptions, TesseractOcrOptions, - PaddleOcrOptions ) from docling.document_converter import DocumentConverter, PdfFormatOption @@ -57,7 +57,7 @@ def test_e2e_conversions(): EasyOcrOptions(), TesseractOcrOptions(), TesseractCliOcrOptions(), - PaddleOcrOptions(), + PaddleOcrOptions(), EasyOcrOptions(force_full_page_ocr=True), TesseractOcrOptions(force_full_page_ocr=True), TesseractCliOcrOptions(force_full_page_ocr=True),