mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 12:34:22 +00:00
fix: PermissionError when using tesseract_ocr_cli_model (#496)
Signed-off-by: Gaspard Petit <gaspardpetit@gmail.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
6ca85993f4
commit
32e9b4a2cf
@ -1,5 +1,6 @@
|
|||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
from subprocess import DEVNULL, PIPE, Popen
|
from subprocess import DEVNULL, PIPE, Popen
|
||||||
from typing import Iterable, Optional, Tuple
|
from typing import Iterable, Optional, Tuple
|
||||||
@ -130,14 +131,17 @@ class TesseractOcrCliModel(BaseOcrModel):
|
|||||||
high_res_image = page._backend.get_page_image(
|
high_res_image = page._backend.get_page_image(
|
||||||
scale=self.scale, cropbox=ocr_rect
|
scale=self.scale, cropbox=ocr_rect
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
with tempfile.NamedTemporaryFile(
|
with tempfile.NamedTemporaryFile(
|
||||||
suffix=".png", mode="w"
|
suffix=".png", mode="w+b", delete=False
|
||||||
) as image_file:
|
) as image_file:
|
||||||
fname = image_file.name
|
fname = image_file.name
|
||||||
high_res_image.save(fname)
|
high_res_image.save(image_file)
|
||||||
|
|
||||||
df = self._run_tesseract(fname)
|
df = self._run_tesseract(fname)
|
||||||
|
finally:
|
||||||
|
if os.path.exists(fname):
|
||||||
|
os.remove(fname)
|
||||||
|
|
||||||
# _log.info(df)
|
# _log.info(df)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user