From dadff50589601fb54e4f6e7271506a2c37126b68 Mon Sep 17 00:00:00 2001 From: Nikos Livathinos Date: Tue, 18 Feb 2025 10:58:11 +0100 Subject: [PATCH] fix: Disable the TOKENIZERS_PARALLELISM in test_e2e_ocr_conversion.py to avoid warning messages from HF Signed-off-by: Nikos Livathinos --- tests/test_e2e_ocr_conversion.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_e2e_ocr_conversion.py b/tests/test_e2e_ocr_conversion.py index 62e4c855..0ef00a42 100644 --- a/tests/test_e2e_ocr_conversion.py +++ b/tests/test_e2e_ocr_conversion.py @@ -1,3 +1,4 @@ +import os import sys from pathlib import Path from typing import List @@ -53,6 +54,12 @@ def get_converter(ocr_options: OcrOptions): def test_e2e_conversions(): + r""" + End-to-end conversions with OCR + """ + # Disable parallelisation for HF tokenizers to avoid warning messages + os.environ["TOKENIZERS_PARALLELISM"] = "false" + pdf_paths = get_pdf_paths() engines: List[OcrOptions] = [