expose easyocr arguments

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2024-10-07 15:17:40 +02:00
parent 99dfbf6107
commit 9eb3afc16c
2 changed files with 9 additions and 2 deletions

View File

@ -1,5 +1,5 @@
from enum import Enum, auto
from typing import List, Literal, Union
from typing import List, Literal, Optional, Union
from pydantic import BaseModel, Field
@ -26,6 +26,9 @@ class OcrOptions(BaseModel):
class EasyOcrOptions(OcrOptions):
kind: Literal["easyocr"] = "easyocr"
lang: List[str] = ["fr", "de", "es", "en"]
use_gpu: bool = True # same default as easyocr.Reader
model_storage_directory: Optional[str] = None
download_enabled: bool = True # same default as easyocr.Reader
class TesseractOcrOptions(OcrOptions):

View File

@ -26,7 +26,11 @@ class EasyOcrModel(BaseOcrModel):
"Alternatively, Docling has support for other OCR engines. See the documentation."
)
self.reader = easyocr.Reader(lang_list=self.options.lang)
self.reader = easyocr.Reader(
lang_list=self.options.lang,
model_storage_directory=self.options.model_storage_directory,
download_enabled=self.options.download_enabled,
)
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: