mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 15:02:21 +00:00
use regex for supporting multiple sep
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
e625f5d87b
commit
2a02ee83de
@ -1,6 +1,7 @@
|
|||||||
import importlib
|
import importlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
import warnings
|
import warnings
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
@ -129,10 +130,10 @@ def export_documents(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _comma_split(raw: Optional[str]) -> Optional[List[str]]:
|
def _split_list(raw: Optional[str]) -> Optional[List[str]]:
|
||||||
if raw is None:
|
if raw is None:
|
||||||
return None
|
return None
|
||||||
return raw.split(",")
|
return re.split(r"[;,]", raw)
|
||||||
|
|
||||||
|
|
||||||
@app.command(no_args_is_help=True)
|
@app.command(no_args_is_help=True)
|
||||||
@ -261,7 +262,7 @@ def convert(
|
|||||||
case _:
|
case _:
|
||||||
raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}")
|
raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}")
|
||||||
|
|
||||||
ocr_lang_list = _comma_split(ocr_lang)
|
ocr_lang_list = _split_list(ocr_lang)
|
||||||
if ocr_lang_list is not None:
|
if ocr_lang_list is not None:
|
||||||
ocr_options.lang = ocr_lang_list
|
ocr_options.lang = ocr_lang_list
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user