fix: Runtime error when Pandas Series is not always of string type

Signed-off-by: fan <fansluck@qq.com>
This commit is contained in:
fan 2025-02-20 18:40:31 +08:00
parent dfcc30dddb
commit 9b4328c817

View File

@ -114,7 +114,9 @@ class TesseractOcrCliModel(BaseOcrModel):
# _log.info("df: ", df.head())
# Filter rows that contain actual text (ignore header or empty rows)
df_filtered = df[df["text"].notnull() & (df["text"].str.strip() != "")]
df_filtered = df[
df["text"].notnull() & (df["text"].apply(str).str.strip() != "")
]
return df_filtered