fix: ParserError EOF inside string (#470)

Signed-off-by: guglie <gdguglie@gmail.com>
This commit is contained in:
guglie 2024-11-29 17:25:06 +01:00 committed by GitHub
parent cc46c938b6
commit a52305990b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,3 +1,4 @@
import csv
import io
import logging
import tempfile
@ -95,7 +96,7 @@ class TesseractOcrCliModel(BaseOcrModel):
# _log.info(decoded_data)
# Read the TSV file generated by Tesseract
df = pd.read_csv(io.StringIO(decoded_data), sep="\t")
df = pd.read_csv(io.StringIO(decoded_data), quoting=csv.QUOTE_NONE, sep="\t")
# Display the dataframe (optional)
# _log.info("df: ", df.head())