mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-29 21:44:32 +00:00
feat(ocr): Add OnnxTR as possible OCR engine
Signed-off-by: felix <felixdittrich92@gmail.com>
This commit is contained in:
parent
c28907ed9c
commit
98496fafcc
3
.github/actions/setup-poetry/action.yml
vendored
3
.github/actions/setup-poetry/action.yml
vendored
@ -14,6 +14,9 @@ runs:
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
cache: 'poetry'
|
||||
- name: Update lockfile
|
||||
run: poetry update
|
||||
shell: bash
|
||||
- name: Install dependencies
|
||||
run: poetry install --all-extras
|
||||
shell: bash
|
||||
|
@ -44,7 +44,7 @@ packages = [{ include = "docling" }]
|
||||
######################
|
||||
# actual dependencies:
|
||||
######################
|
||||
python = "^3.10"
|
||||
python = "^3.9"
|
||||
pydantic = "^2.0.0"
|
||||
docling-core = {extras = ["chunking"], version = "^2.24.1"}
|
||||
docling-ibm-models = "^3.4.0"
|
||||
|
@ -63,7 +63,6 @@ def test_e2e_conversions():
|
||||
TesseractOcrOptions(),
|
||||
TesseractCliOcrOptions(),
|
||||
EasyOcrOptions(force_full_page_ocr=True),
|
||||
OnnxtrOcrOptions(force_full_page_ocr=True),
|
||||
TesseractOcrOptions(force_full_page_ocr=True),
|
||||
TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]),
|
||||
TesseractCliOcrOptions(force_full_page_ocr=True),
|
||||
@ -75,6 +74,11 @@ def test_e2e_conversions():
|
||||
engines.append(RapidOcrOptions())
|
||||
engines.append(RapidOcrOptions(force_full_page_ocr=True))
|
||||
|
||||
# onnxtr is only available for Python >=3.10
|
||||
if sys.version_info >= (3, 10):
|
||||
engines.append(OnnxtrOcrOptions())
|
||||
engines.append(OnnxtrOcrOptions(force_full_page_ocr=True))
|
||||
|
||||
# only works on mac
|
||||
if "darwin" == sys.platform:
|
||||
engines.append(OcrMacOptions())
|
||||
|
Loading…
Reference in New Issue
Block a user