mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-31 14:34:40 +00:00
fix: PermissionError when using tesseract_ocr_cli_model
Make sure that the `tesseract_ocr_cli_model.py` does not open the png image file twice (`tempfile.NamedTemporaryFile` + `high_res_image.save`), and ensure that `_run_tesseract` is executed once the file is no longer open by python. This other results in a "PermissionError: [Errno 13] Permission denied" error on Windows. Signed-off-by: Gaspard Petit <gaspardpetit@gmail.com>
This commit is contained in:
parent
8ccb3c6db6
commit
42c544996d
@ -1,4 +1,5 @@
|
||||
import io
|
||||
import os
|
||||
import logging
|
||||
import tempfile
|
||||
from subprocess import DEVNULL, PIPE, Popen
|
||||
@ -130,14 +131,17 @@ class TesseractOcrCliModel(BaseOcrModel):
|
||||
high_res_image = page._backend.get_page_image(
|
||||
scale=self.scale, cropbox=ocr_rect
|
||||
)
|
||||
|
||||
with tempfile.NamedTemporaryFile(
|
||||
suffix=".png", mode="w"
|
||||
) as image_file:
|
||||
fname = image_file.name
|
||||
high_res_image.save(fname)
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(
|
||||
suffix=".png", mode="w+b", delete=False
|
||||
) as image_file:
|
||||
fname = image_file.name
|
||||
high_res_image.save(image_file)
|
||||
|
||||
df = self._run_tesseract(fname)
|
||||
finally:
|
||||
if os.path.exists(fname):
|
||||
os.remove(fname)
|
||||
|
||||
# _log.info(df)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user