mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-30 14:04:27 +00:00
feat(ocr): Add OnnxTR as possible OCR engine
Signed-off-by: felix <felixdittrich92@gmail.com>
This commit is contained in:
parent
c28907ed9c
commit
98496fafcc
3
.github/actions/setup-poetry/action.yml
vendored
3
.github/actions/setup-poetry/action.yml
vendored
@ -14,6 +14,9 @@ runs:
|
|||||||
with:
|
with:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
cache: 'poetry'
|
cache: 'poetry'
|
||||||
|
- name: Update lockfile
|
||||||
|
run: poetry update
|
||||||
|
shell: bash
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: poetry install --all-extras
|
run: poetry install --all-extras
|
||||||
shell: bash
|
shell: bash
|
||||||
|
@ -44,7 +44,7 @@ packages = [{ include = "docling" }]
|
|||||||
######################
|
######################
|
||||||
# actual dependencies:
|
# actual dependencies:
|
||||||
######################
|
######################
|
||||||
python = "^3.10"
|
python = "^3.9"
|
||||||
pydantic = "^2.0.0"
|
pydantic = "^2.0.0"
|
||||||
docling-core = {extras = ["chunking"], version = "^2.24.1"}
|
docling-core = {extras = ["chunking"], version = "^2.24.1"}
|
||||||
docling-ibm-models = "^3.4.0"
|
docling-ibm-models = "^3.4.0"
|
||||||
|
@ -63,7 +63,6 @@ def test_e2e_conversions():
|
|||||||
TesseractOcrOptions(),
|
TesseractOcrOptions(),
|
||||||
TesseractCliOcrOptions(),
|
TesseractCliOcrOptions(),
|
||||||
EasyOcrOptions(force_full_page_ocr=True),
|
EasyOcrOptions(force_full_page_ocr=True),
|
||||||
OnnxtrOcrOptions(force_full_page_ocr=True),
|
|
||||||
TesseractOcrOptions(force_full_page_ocr=True),
|
TesseractOcrOptions(force_full_page_ocr=True),
|
||||||
TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]),
|
TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]),
|
||||||
TesseractCliOcrOptions(force_full_page_ocr=True),
|
TesseractCliOcrOptions(force_full_page_ocr=True),
|
||||||
@ -75,6 +74,11 @@ def test_e2e_conversions():
|
|||||||
engines.append(RapidOcrOptions())
|
engines.append(RapidOcrOptions())
|
||||||
engines.append(RapidOcrOptions(force_full_page_ocr=True))
|
engines.append(RapidOcrOptions(force_full_page_ocr=True))
|
||||||
|
|
||||||
|
# onnxtr is only available for Python >=3.10
|
||||||
|
if sys.version_info >= (3, 10):
|
||||||
|
engines.append(OnnxtrOcrOptions())
|
||||||
|
engines.append(OnnxtrOcrOptions(force_full_page_ocr=True))
|
||||||
|
|
||||||
# only works on mac
|
# only works on mac
|
||||||
if "darwin" == sys.platform:
|
if "darwin" == sys.platform:
|
||||||
engines.append(OcrMacOptions())
|
engines.append(OcrMacOptions())
|
||||||
|
Loading…
Reference in New Issue
Block a user