mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 15:02:21 +00:00
fix: PermissionError when using tesseract_ocr_cli_model
Make sure that the `tesseract_ocr_cli_model.py` does not open the png image file twice (`tempfile.NamedTemporaryFile` + `high_res_image.save`), and ensure that `_run_tesseract` is executed once the file is no longer open by python. This other results in a "PermissionError: [Errno 13] Permission denied" error on Windows. Signed-off-by: Gaspard Petit <gaspardpetit@gmail.com>
This commit is contained in:
parent
d7072b4b56
commit
0d12ad1dcc
@ -1,4 +1,5 @@
|
|||||||
import io
|
import io
|
||||||
|
import os
|
||||||
import logging
|
import logging
|
||||||
import tempfile
|
import tempfile
|
||||||
from subprocess import DEVNULL, PIPE, Popen
|
from subprocess import DEVNULL, PIPE, Popen
|
||||||
@ -130,14 +131,17 @@ class TesseractOcrCliModel(BaseOcrModel):
|
|||||||
high_res_image = page._backend.get_page_image(
|
high_res_image = page._backend.get_page_image(
|
||||||
scale=self.scale, cropbox=ocr_rect
|
scale=self.scale, cropbox=ocr_rect
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
with tempfile.NamedTemporaryFile(
|
with tempfile.NamedTemporaryFile(
|
||||||
suffix=".png", mode="w"
|
suffix=".png", mode="w+b", delete=False
|
||||||
) as image_file:
|
) as image_file:
|
||||||
fname = image_file.name
|
fname = image_file.name
|
||||||
high_res_image.save(fname)
|
high_res_image.save(image_file)
|
||||||
|
|
||||||
df = self._run_tesseract(fname)
|
df = self._run_tesseract(fname)
|
||||||
|
finally:
|
||||||
|
if os.path.exists(fname):
|
||||||
|
os.remove(fname)
|
||||||
|
|
||||||
# _log.info(df)
|
# _log.info(df)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user