mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
Merge from main
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
commit
678eed2057
@ -187,7 +187,17 @@ class DoclingParseV4DocumentBackend(PdfDocumentBackend):
|
|||||||
|
|
||||||
def unload(self):
|
def unload(self):
|
||||||
super().unload()
|
super().unload()
|
||||||
self.dp_doc.unload()
|
# Unload docling-parse document first
|
||||||
with pypdfium2_lock:
|
if self.dp_doc is not None:
|
||||||
self._pdoc.close()
|
self.dp_doc.unload()
|
||||||
self._pdoc = None
|
self.dp_doc = None
|
||||||
|
|
||||||
|
# Then close pypdfium2 document with proper locking
|
||||||
|
if self._pdoc is not None:
|
||||||
|
with pypdfium2_lock:
|
||||||
|
try:
|
||||||
|
self._pdoc.close()
|
||||||
|
except Exception:
|
||||||
|
# Ignore cleanup errors
|
||||||
|
pass
|
||||||
|
self._pdoc = None
|
||||||
|
@ -144,7 +144,10 @@ class TesseractOcrModel(BaseOcrModel):
|
|||||||
|
|
||||||
local_reader = self.reader
|
local_reader = self.reader
|
||||||
self.osd_reader.SetImage(high_res_image)
|
self.osd_reader.SetImage(high_res_image)
|
||||||
|
|
||||||
|
doc_orientation = 0
|
||||||
osd = self.osd_reader.DetectOrientationScript()
|
osd = self.osd_reader.DetectOrientationScript()
|
||||||
|
|
||||||
# No text, or Orientation and Script detection failure
|
# No text, or Orientation and Script detection failure
|
||||||
if osd is None:
|
if osd is None:
|
||||||
_log.error(
|
_log.error(
|
||||||
@ -158,11 +161,14 @@ class TesseractOcrModel(BaseOcrModel):
|
|||||||
# to OCR in the hope OCR will succeed while OSD failed
|
# to OCR in the hope OCR will succeed while OSD failed
|
||||||
if self._is_auto:
|
if self._is_auto:
|
||||||
continue
|
continue
|
||||||
doc_orientation = parse_tesseract_orientation(osd["orient_deg"])
|
else:
|
||||||
if doc_orientation != 0:
|
doc_orientation = parse_tesseract_orientation(
|
||||||
high_res_image = high_res_image.rotate(
|
osd["orient_deg"]
|
||||||
-doc_orientation, expand=True
|
|
||||||
)
|
)
|
||||||
|
if doc_orientation != 0:
|
||||||
|
high_res_image = high_res_image.rotate(
|
||||||
|
-doc_orientation, expand=True
|
||||||
|
)
|
||||||
if self._is_auto:
|
if self._is_auto:
|
||||||
script = osd["script_name"]
|
script = osd["script_name"]
|
||||||
script = map_tesseract_script(script)
|
script = map_tesseract_script(script)
|
||||||
|
@ -46,6 +46,12 @@ def test_text_cell_counts():
|
|||||||
)
|
)
|
||||||
last_cell_count = len(cells)
|
last_cell_count = len(cells)
|
||||||
|
|
||||||
|
# Clean up page backend after each iteration
|
||||||
|
page_backend.unload()
|
||||||
|
|
||||||
|
# Explicitly clean up document backend to prevent race conditions in CI
|
||||||
|
doc_backend.unload()
|
||||||
|
|
||||||
|
|
||||||
def test_get_text_from_rect(test_doc_path):
|
def test_get_text_from_rect(test_doc_path):
|
||||||
doc_backend = _get_backend(test_doc_path)
|
doc_backend = _get_backend(test_doc_path)
|
||||||
@ -59,6 +65,10 @@ def test_get_text_from_rect(test_doc_path):
|
|||||||
|
|
||||||
assert textpiece.strip() == ref
|
assert textpiece.strip() == ref
|
||||||
|
|
||||||
|
# Explicitly clean up resources
|
||||||
|
page_backend.unload()
|
||||||
|
doc_backend.unload()
|
||||||
|
|
||||||
|
|
||||||
def test_crop_page_image(test_doc_path):
|
def test_crop_page_image(test_doc_path):
|
||||||
doc_backend = _get_backend(test_doc_path)
|
doc_backend = _get_backend(test_doc_path)
|
||||||
@ -70,7 +80,14 @@ def test_crop_page_image(test_doc_path):
|
|||||||
)
|
)
|
||||||
# im.show()
|
# im.show()
|
||||||
|
|
||||||
|
# Explicitly clean up resources
|
||||||
|
page_backend.unload()
|
||||||
|
doc_backend.unload()
|
||||||
|
|
||||||
|
|
||||||
def test_num_pages(test_doc_path):
|
def test_num_pages(test_doc_path):
|
||||||
doc_backend = _get_backend(test_doc_path)
|
doc_backend = _get_backend(test_doc_path)
|
||||||
doc_backend.page_count() == 9
|
doc_backend.page_count() == 9
|
||||||
|
|
||||||
|
# Explicitly clean up resources to prevent race conditions in CI
|
||||||
|
doc_backend.unload()
|
||||||
|
Loading…
Reference in New Issue
Block a user