feat(ocr): Add OnnxTR as possible OCR engine

Signed-off-by: felix <felixdittrich92@gmail.com>
This commit is contained in:
felix 2025-03-25 08:00:47 +01:00
parent c28907ed9c
commit 98496fafcc
3 changed files with 9 additions and 2 deletions

View File

@ -14,6 +14,9 @@ runs:
with:
python-version: ${{ inputs.python-version }}
cache: 'poetry'
- name: Update lockfile
run: poetry update
shell: bash
- name: Install dependencies
run: poetry install --all-extras
shell: bash

View File

@ -44,7 +44,7 @@ packages = [{ include = "docling" }]
######################
# actual dependencies:
######################
python = "^3.10"
python = "^3.9"
pydantic = "^2.0.0"
docling-core = {extras = ["chunking"], version = "^2.24.1"}
docling-ibm-models = "^3.4.0"

View File

@ -63,7 +63,6 @@ def test_e2e_conversions():
TesseractOcrOptions(),
TesseractCliOcrOptions(),
EasyOcrOptions(force_full_page_ocr=True),
OnnxtrOcrOptions(force_full_page_ocr=True),
TesseractOcrOptions(force_full_page_ocr=True),
TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]),
TesseractCliOcrOptions(force_full_page_ocr=True),
@ -75,6 +74,11 @@ def test_e2e_conversions():
engines.append(RapidOcrOptions())
engines.append(RapidOcrOptions(force_full_page_ocr=True))
# onnxtr is only available for Python >=3.10
if sys.version_info >= (3, 10):
engines.append(OnnxtrOcrOptions())
engines.append(OnnxtrOcrOptions(force_full_page_ocr=True))
# only works on mac
if "darwin" == sys.platform:
engines.append(OcrMacOptions())