[Feature] Add OnnxTR as possible OCR engine

Signed-off-by: felix <felixdittrich92@gmail.com>
This commit is contained in:
felix 2025-03-21 22:17:51 +01:00
parent a19cf81f98
commit cfc42458ae
3 changed files with 42 additions and 27 deletions

View File

@ -157,16 +157,23 @@ class OnnxtrOcrOptions(OcrOptions):
kind: ClassVar[Literal["onnxtr"]] = "onnxtr"
lang: List[str] = ["en", "fr"]
confidence_score: float = 0.5
# word confidence threshold for the recognition model
confidence_score: float = 0.7
# detection model objectness score threshold 'fast algorithm'
objectness_score: float = 0.3
# NOTE: This can be also a hf hub model
det_arch: str = "fast_base"
reco_arch: str = "crnn_vgg16_bn" # NOTE: This can be also a hf hub model
reco_arch: str = "crnn_vgg16_bn"
reco_bs: int = 512
auto_correct_orientation: bool = False
preserve_aspect_ratio: bool = True
symmetric_pad: bool = True
paragraph_break: float = 0.035
load_in_8_bit: bool = False
# Ref.: https://onnxruntime.ai/docs/api/python/api_summary.html
providers: list[tuple[str, dict[str, Any]]] | list[str] | None = None
session_options: Any = None
model_config = ConfigDict(
extra="forbid",

View File

@ -50,7 +50,7 @@ class OnnxtrOcrModel(BaseOcrModel):
ocr_predictor,
)
# We diable multiprocessing for OnnxTR,
# We disable multiprocessing for OnnxTR,
# because the speed up is minimal and it can raise memory leaks on windows
os.environ["ONNXTR_MULTIPROCESSING_DISABLE"] = "TRUE"
except ImportError:
@ -72,12 +72,16 @@ class OnnxtrOcrModel(BaseOcrModel):
config = {
"assume_straight_pages": True,
"straighten_pages": False,
# This should be disabled when docling supports polygons
"export_as_straight_boxes": True,
"export_as_straight_boxes": False,
"disable_crop_orientation": False,
"disable_page_orientation": False,
}
engine_cfg = EngineConfig(
providers=self.options.providers,
session_options=self.options.session_options,
)
self.reader = ocr_predictor(
det_arch=(
from_hub(self.options.det_arch)
@ -95,10 +99,9 @@ class OnnxtrOcrModel(BaseOcrModel):
paragraph_break=self.options.paragraph_break,
load_in_8_bit=self.options.load_in_8_bit,
**config,
# TODO: Allow specification of the engine configs in the options
det_engine_cfg=None,
reco_engine_cfg=None,
clf_engine_cfg=None,
det_engine_cfg=engine_cfg,
reco_engine_cfg=engine_cfg,
clf_engine_cfg=engine_cfg,
)
def _to_absolute_and_docling_format(
@ -170,24 +173,29 @@ class OnnxtrOcrModel(BaseOcrModel):
for line in block.lines
for word in line.words
):
all_ocr_cells.append(
TextCell(
index=ix,
text=word.value,
orig=word.value,
from_ocr=True,
confidence=word.confidence,
rect=BoundingRectangle.from_bounding_box(
BoundingBox.from_tuple(
self._to_absolute_and_docling_format(
word.geometry,
img_shape=(im_height, im_width),
),
origin=CoordOrigin.TOPLEFT,
)
),
if (
word.confidence >= self.options.confidence_score
and word.objectness_score
>= self.options.objectness_score
):
all_ocr_cells.append(
TextCell(
index=ix,
text=word.value,
orig=word.value,
from_ocr=True,
confidence=word.confidence,
rect=BoundingRectangle.from_bounding_box(
BoundingBox.from_tuple(
self._to_absolute_and_docling_format(
word.geometry,
img_shape=(im_height, im_width),
),
origin=CoordOrigin.TOPLEFT,
)
),
)
)
)
# Post-process the cells
page.cells = self.post_process_cells(all_ocr_cells, page.cells)

View File

@ -14,7 +14,7 @@ def main():
ocr_options = OnnxtrOcrOptions(
det_arch="db_mobilenet_v3_large",
reco_arch="Felix92/onnxtr-parseq-multilingual-v1", # Model will be downloaded from Hugging Face Hub
auto_correct_orientation=True, # This can be used to correct the orientation of the pages
auto_correct_orientation=False, # This can be set to `True` to auto-correct the orientation of the pages
)
pipeline_options = PdfPipelineOptions(