feat(ocr): Add OnnxTR as possible OCR engine

Signed-off-by: felix <felixdittrich92@gmail.com>
This commit is contained in:
felix 2025-03-23 10:53:39 +01:00
parent f6560cf662
commit c28907ed9c

View File

@ -3,7 +3,7 @@ import os
import re import re
from enum import Enum from enum import Enum
from pathlib import Path from pathlib import Path
from typing import Any, ClassVar, Dict, List, Literal, Optional, Union from typing import Any, ClassVar, Dict, List, Literal, Optional, Tuple, Union
from pydantic import ( from pydantic import (
AnyUrl, AnyUrl,
@ -158,7 +158,7 @@ class OnnxtrOcrOptions(OcrOptions):
lang: List[str] = ["en", "fr"] lang: List[str] = ["en", "fr"]
# word confidence threshold for the recognition model # word confidence threshold for the recognition model
confidence_score: float = 0.7 confidence_score: float = 0.5
# detection model objectness score threshold 'fast algorithm' # detection model objectness score threshold 'fast algorithm'
objectness_score: float = 0.3 objectness_score: float = 0.3
@ -172,8 +172,8 @@ class OnnxtrOcrOptions(OcrOptions):
paragraph_break: float = 0.035 paragraph_break: float = 0.035
load_in_8_bit: bool = False load_in_8_bit: bool = False
# Ref.: https://onnxruntime.ai/docs/api/python/api_summary.html # Ref.: https://onnxruntime.ai/docs/api/python/api_summary.html
providers: list[tuple[str, dict[str, Any]]] | list[str] | None = None providers: Optional[List[Tuple[str, Dict[str, Any]]]] = None
session_options: Any = None session_options: Optional[Any] = None
model_config = ConfigDict( model_config = ConfigDict(
extra="forbid", extra="forbid",