diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index c9b4a9df..0c915bbf 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -1,5 +1,5 @@ from enum import Enum, auto -from typing import List, Literal, Union +from typing import List, Literal, Optional, Union from pydantic import BaseModel, Field @@ -26,6 +26,9 @@ class OcrOptions(BaseModel): class EasyOcrOptions(OcrOptions): kind: Literal["easyocr"] = "easyocr" lang: List[str] = ["fr", "de", "es", "en"] + use_gpu: bool = True # same default as easyocr.Reader + model_storage_directory: Optional[str] = None + download_enabled: bool = True # same default as easyocr.Reader class TesseractOcrOptions(OcrOptions): diff --git a/docling/models/easyocr_model.py b/docling/models/easyocr_model.py index fef0958d..c36c6657 100644 --- a/docling/models/easyocr_model.py +++ b/docling/models/easyocr_model.py @@ -26,7 +26,11 @@ class EasyOcrModel(BaseOcrModel): "Alternatively, Docling has support for other OCR engines. See the documentation." ) - self.reader = easyocr.Reader(lang_list=self.options.lang) + self.reader = easyocr.Reader( + lang_list=self.options.lang, + model_storage_directory=self.options.model_storage_directory, + download_enabled=self.options.download_enabled, + ) def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: