feat: expose ocr-lang in CLI

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2024-11-19 10:57:35 +01:00
parent e6f89d520f
commit e625f5d87b
2 changed files with 18 additions and 0 deletions

View File

@ -129,6 +129,12 @@ def export_documents(
) )
def _comma_split(raw: Optional[str]) -> Optional[List[str]]:
if raw is None:
return None
return raw.split(",")
@app.command(no_args_is_help=True) @app.command(no_args_is_help=True)
def convert( def convert(
input_sources: Annotated[ input_sources: Annotated[
@ -163,6 +169,13 @@ def convert(
ocr_engine: Annotated[ ocr_engine: Annotated[
OcrEngine, typer.Option(..., help="The OCR engine to use.") OcrEngine, typer.Option(..., help="The OCR engine to use.")
] = OcrEngine.EASYOCR, ] = OcrEngine.EASYOCR,
ocr_lang: Annotated[
Optional[str],
typer.Option(
...,
help="Provide a comma-separated list of languages used by the OCR engine. Note that each OCR engine has different values for the language names.",
),
] = None,
pdf_backend: Annotated[ pdf_backend: Annotated[
PdfBackend, typer.Option(..., help="The PDF backend to use.") PdfBackend, typer.Option(..., help="The PDF backend to use.")
] = PdfBackend.DLPARSE_V1, ] = PdfBackend.DLPARSE_V1,
@ -248,6 +261,10 @@ def convert(
case _: case _:
raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}") raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}")
ocr_lang_list = _comma_split(ocr_lang)
if ocr_lang_list is not None:
ocr_options.lang = ocr_lang_list
pipeline_options = PdfPipelineOptions( pipeline_options = PdfPipelineOptions(
do_ocr=ocr, do_ocr=ocr,
ocr_options=ocr_options, ocr_options=ocr_options,

View File

@ -22,6 +22,7 @@ class TableStructureOptions(BaseModel):
class OcrOptions(BaseModel): class OcrOptions(BaseModel):
kind: str kind: str
lang: List[str]
force_full_page_ocr: bool = False # If enabled a full page OCR is always applied force_full_page_ocr: bool = False # If enabled a full page OCR is always applied
bitmap_area_threshold: float = ( bitmap_area_threshold: float = (
0.05 # percentage of the area for a bitmap to processed with OCR 0.05 # percentage of the area for a bitmap to processed with OCR