fix: ParserError EOF inside string (#470) (#472)

Signed-off-by: guglie <gdguglie@gmail.com>
This commit is contained in:
guglie
2024-12-03 11:21:18 +01:00
committed by GitHub
parent 5ba3807f31
commit c90c41c391

View File

@@ -1,3 +1,4 @@
import csv
import io
import logging
import os
@@ -96,7 +97,7 @@ class TesseractOcrCliModel(BaseOcrModel):
# _log.info(decoded_data)
# Read the TSV file generated by Tesseract
df = pd.read_csv(io.StringIO(decoded_data), sep="\t")
df = pd.read_csv(io.StringIO(decoded_data), quoting=csv.QUOTE_NONE, sep="\t")
# Display the dataframe (optional)
# _log.info("df: ", df.head())