docling/docling/utils/ocr_utils.py
Nikos Livathinos 4c2552efc5 feat: Introduce automatic language detection in tesseract_ocr_cli model. Extend unit tests.
Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com>
2025-01-24 11:20:21 +01:00

10 lines
263 B
Python

def map_tesseract_script(script: str) -> str:
r""" """
if script == "Katakana" or script == "Hiragana":
script = "Japanese"
elif script == "Han":
script = "HanS"
elif script == "Korean":
script = "Hangul"
return script