expose easyocr arguments

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2024-10-07 15:17:40 +02:00
parent 99dfbf6107
commit 9eb3afc16c
2 changed files with 9 additions and 2 deletions

View File

@ -1,5 +1,5 @@
from enum import Enum, auto from enum import Enum, auto
from typing import List, Literal, Union from typing import List, Literal, Optional, Union
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
@ -26,6 +26,9 @@ class OcrOptions(BaseModel):
class EasyOcrOptions(OcrOptions): class EasyOcrOptions(OcrOptions):
kind: Literal["easyocr"] = "easyocr" kind: Literal["easyocr"] = "easyocr"
lang: List[str] = ["fr", "de", "es", "en"] lang: List[str] = ["fr", "de", "es", "en"]
use_gpu: bool = True # same default as easyocr.Reader
model_storage_directory: Optional[str] = None
download_enabled: bool = True # same default as easyocr.Reader
class TesseractOcrOptions(OcrOptions): class TesseractOcrOptions(OcrOptions):

View File

@ -26,7 +26,11 @@ class EasyOcrModel(BaseOcrModel):
"Alternatively, Docling has support for other OCR engines. See the documentation." "Alternatively, Docling has support for other OCR engines. See the documentation."
) )
self.reader = easyocr.Reader(lang_list=self.options.lang) self.reader = easyocr.Reader(
lang_list=self.options.lang,
model_storage_directory=self.options.model_storage_directory,
download_enabled=self.options.download_enabled,
)
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: