diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py index 9a50eee0..a6b2f7fb 100644 --- a/docling/models/tesseract_ocr_cli_model.py +++ b/docling/models/tesseract_ocr_cli_model.py @@ -1,3 +1,4 @@ +import csv import io import logging import tempfile @@ -95,7 +96,7 @@ class TesseractOcrCliModel(BaseOcrModel): # _log.info(decoded_data) # Read the TSV file generated by Tesseract - df = pd.read_csv(io.StringIO(decoded_data), sep="\t") + df = pd.read_csv(io.StringIO(decoded_data), quoting=csv.QUOTE_NONE, sep="\t") # Display the dataframe (optional) # _log.info("df: ", df.head())