mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 20:44:16 +00:00
Signed-off-by: guglie <gdguglie@gmail.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
a01cedbb69
commit
a7e3f713bb
@ -1,3 +1,4 @@
|
|||||||
|
import csv
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@ -96,7 +97,7 @@ class TesseractOcrCliModel(BaseOcrModel):
|
|||||||
# _log.info(decoded_data)
|
# _log.info(decoded_data)
|
||||||
|
|
||||||
# Read the TSV file generated by Tesseract
|
# Read the TSV file generated by Tesseract
|
||||||
df = pd.read_csv(io.StringIO(decoded_data), sep="\t")
|
df = pd.read_csv(io.StringIO(decoded_data), quoting=csv.QUOTE_NONE, sep="\t")
|
||||||
|
|
||||||
# Display the dataframe (optional)
|
# Display the dataframe (optional)
|
||||||
# _log.info("df: ", df.head())
|
# _log.info("df: ", df.head())
|
||||||
|
Loading…
Reference in New Issue
Block a user