From 9eb3afc16c0d9325e4e61acad001362f85182fba Mon Sep 17 00:00:00 2001 From: Michele Dolfi Date: Mon, 7 Oct 2024 15:17:40 +0200 Subject: [PATCH] expose easyocr arguments Signed-off-by: Michele Dolfi --- docling/datamodel/pipeline_options.py | 5 ++++- docling/models/easyocr_model.py | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index c9b4a9df..0c915bbf 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -1,5 +1,5 @@ from enum import Enum, auto -from typing import List, Literal, Union +from typing import List, Literal, Optional, Union from pydantic import BaseModel, Field @@ -26,6 +26,9 @@ class OcrOptions(BaseModel): class EasyOcrOptions(OcrOptions): kind: Literal["easyocr"] = "easyocr" lang: List[str] = ["fr", "de", "es", "en"] + use_gpu: bool = True # same default as easyocr.Reader + model_storage_directory: Optional[str] = None + download_enabled: bool = True # same default as easyocr.Reader class TesseractOcrOptions(OcrOptions): diff --git a/docling/models/easyocr_model.py b/docling/models/easyocr_model.py index fef0958d..c36c6657 100644 --- a/docling/models/easyocr_model.py +++ b/docling/models/easyocr_model.py @@ -26,7 +26,11 @@ class EasyOcrModel(BaseOcrModel): "Alternatively, Docling has support for other OCR engines. See the documentation." ) - self.reader = easyocr.Reader(lang_list=self.options.lang) + self.reader = easyocr.Reader( + lang_list=self.options.lang, + model_storage_directory=self.options.model_storage_directory, + download_enabled=self.options.download_enabled, + ) def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: