mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 15:02:21 +00:00
fix: ParserError EOF inside string (#470)
Signed-off-by: guglie <gdguglie@gmail.com>
This commit is contained in:
parent
cc46c938b6
commit
a52305990b
@ -1,3 +1,4 @@
|
|||||||
|
import csv
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
import tempfile
|
import tempfile
|
||||||
@ -95,7 +96,7 @@ class TesseractOcrCliModel(BaseOcrModel):
|
|||||||
# _log.info(decoded_data)
|
# _log.info(decoded_data)
|
||||||
|
|
||||||
# Read the TSV file generated by Tesseract
|
# Read the TSV file generated by Tesseract
|
||||||
df = pd.read_csv(io.StringIO(decoded_data), sep="\t")
|
df = pd.read_csv(io.StringIO(decoded_data), quoting=csv.QUOTE_NONE, sep="\t")
|
||||||
|
|
||||||
# Display the dataframe (optional)
|
# Display the dataframe (optional)
|
||||||
# _log.info("df: ", df.head())
|
# _log.info("df: ", df.head())
|
||||||
|
Loading…
Reference in New Issue
Block a user