fix: PermissionError when using tesseract_ocr_cli_model (#496)

Signed-off-by: Gaspard Petit <gaspardpetit@gmail.com>
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Gaspard Petit 2024-12-03 04:22:03 -05:00 committed by GitHub
parent 6ca85993f4
commit 32e9b4a2cf

View File

@ -1,5 +1,6 @@
import io import io
import logging import logging
import os
import tempfile import tempfile
from subprocess import DEVNULL, PIPE, Popen from subprocess import DEVNULL, PIPE, Popen
from typing import Iterable, Optional, Tuple from typing import Iterable, Optional, Tuple
@ -130,14 +131,17 @@ class TesseractOcrCliModel(BaseOcrModel):
high_res_image = page._backend.get_page_image( high_res_image = page._backend.get_page_image(
scale=self.scale, cropbox=ocr_rect scale=self.scale, cropbox=ocr_rect
) )
try:
with tempfile.NamedTemporaryFile( with tempfile.NamedTemporaryFile(
suffix=".png", mode="w" suffix=".png", mode="w+b", delete=False
) as image_file: ) as image_file:
fname = image_file.name fname = image_file.name
high_res_image.save(fname) high_res_image.save(image_file)
df = self._run_tesseract(fname) df = self._run_tesseract(fname)
finally:
if os.path.exists(fname):
os.remove(fname)
# _log.info(df) # _log.info(df)