feat: add optional ocrmac support

- Added `ocrmac` as an optional dependency in `pyproject.toml` and `poetry.lock`.
- Updated the `[tool.poetry.extras]` section to include `ocrmac`.
- Modified end-to-end OCR conversion tests to support `OcrMacOptions` on macOS.

Signed-off-by: Suhwan Seo <nuridol@gmail.com>
This commit is contained in:
Suhwan Seo 2024-11-20 19:06:41 +09:00
commit c13b128694
3 changed files with 5 additions and 2 deletions

3
poetry.lock generated
View File

@ -7360,9 +7360,10 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
type = ["pytest-mypy"]
[extras]
ocrmac = ["ocrmac"]
tesserocr = ["tesserocr"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "22ce94f59bd5912d1d43d3583318fe4672ee1c97a35d5dbec46fb0a222ea09ff"
content-hash = "a9912d5333298b629d774f669f4e5eb794333d1694ca7f72f56b9a37e51d5776"

View File

@ -48,6 +48,7 @@ beautifulsoup4 = "^4.12.3"
pandas = "^2.1.4"
marko = "^2.1.2"
openpyxl = "^3.1.5"
ocrmac = { version = "^1.0.0", optional = true }
[tool.poetry.group.dev.dependencies]
black = {extras = ["jupyter"], version = "^24.4.2"}
@ -92,10 +93,10 @@ torchvision = [
{markers = "sys_platform != 'darwin' or platform_machine != 'x86_64'", version = "^0"},
{markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'", version = "~0.17.2"}
]
ocrmac = {markers = "sys_platform == 'darwin'", version = "^1.0.0", optional = true}
[tool.poetry.extras]
tesserocr = ["tesserocr"]
ocrmac = ["ocrmac"]
[tool.poetry.scripts]
docling = "docling.cli.main:app"

View File

@ -63,6 +63,7 @@ def test_e2e_conversions():
# only works on mac
if "darwin" == sys.platform:
engines.append(OcrMacOptions())
engines.append(OcrMacOptions(force_full_page_ocr=True))
for ocr_options in engines: