fix: PermissionError when using tesseract_ocr_cli_model (#496)

Signed-off-by: Gaspard Petit <gaspardpetit@gmail.com>
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Gaspard Petit 2024-12-03 04:22:03 -05:00 committed by GitHub
parent 6ca85993f4
commit 32e9b4a2cf

View File

@ -1,5 +1,6 @@
import io
import logging
import os
import tempfile
from subprocess import DEVNULL, PIPE, Popen
from typing import Iterable, Optional, Tuple
@ -130,14 +131,17 @@ class TesseractOcrCliModel(BaseOcrModel):
high_res_image = page._backend.get_page_image(
scale=self.scale, cropbox=ocr_rect
)
with tempfile.NamedTemporaryFile(
suffix=".png", mode="w"
) as image_file:
fname = image_file.name
high_res_image.save(fname)
try:
with tempfile.NamedTemporaryFile(
suffix=".png", mode="w+b", delete=False
) as image_file:
fname = image_file.name
high_res_image.save(image_file)
df = self._run_tesseract(fname)
finally:
if os.path.exists(fname):
os.remove(fname)
# _log.info(df)