mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
Merge from main
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
commit
678eed2057
@ -187,7 +187,17 @@ class DoclingParseV4DocumentBackend(PdfDocumentBackend):
|
||||
|
||||
def unload(self):
|
||||
super().unload()
|
||||
self.dp_doc.unload()
|
||||
with pypdfium2_lock:
|
||||
self._pdoc.close()
|
||||
self._pdoc = None
|
||||
# Unload docling-parse document first
|
||||
if self.dp_doc is not None:
|
||||
self.dp_doc.unload()
|
||||
self.dp_doc = None
|
||||
|
||||
# Then close pypdfium2 document with proper locking
|
||||
if self._pdoc is not None:
|
||||
with pypdfium2_lock:
|
||||
try:
|
||||
self._pdoc.close()
|
||||
except Exception:
|
||||
# Ignore cleanup errors
|
||||
pass
|
||||
self._pdoc = None
|
||||
|
@ -144,7 +144,10 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
|
||||
local_reader = self.reader
|
||||
self.osd_reader.SetImage(high_res_image)
|
||||
|
||||
doc_orientation = 0
|
||||
osd = self.osd_reader.DetectOrientationScript()
|
||||
|
||||
# No text, or Orientation and Script detection failure
|
||||
if osd is None:
|
||||
_log.error(
|
||||
@ -158,11 +161,14 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
# to OCR in the hope OCR will succeed while OSD failed
|
||||
if self._is_auto:
|
||||
continue
|
||||
doc_orientation = parse_tesseract_orientation(osd["orient_deg"])
|
||||
if doc_orientation != 0:
|
||||
high_res_image = high_res_image.rotate(
|
||||
-doc_orientation, expand=True
|
||||
else:
|
||||
doc_orientation = parse_tesseract_orientation(
|
||||
osd["orient_deg"]
|
||||
)
|
||||
if doc_orientation != 0:
|
||||
high_res_image = high_res_image.rotate(
|
||||
-doc_orientation, expand=True
|
||||
)
|
||||
if self._is_auto:
|
||||
script = osd["script_name"]
|
||||
script = map_tesseract_script(script)
|
||||
|
@ -46,6 +46,12 @@ def test_text_cell_counts():
|
||||
)
|
||||
last_cell_count = len(cells)
|
||||
|
||||
# Clean up page backend after each iteration
|
||||
page_backend.unload()
|
||||
|
||||
# Explicitly clean up document backend to prevent race conditions in CI
|
||||
doc_backend.unload()
|
||||
|
||||
|
||||
def test_get_text_from_rect(test_doc_path):
|
||||
doc_backend = _get_backend(test_doc_path)
|
||||
@ -59,6 +65,10 @@ def test_get_text_from_rect(test_doc_path):
|
||||
|
||||
assert textpiece.strip() == ref
|
||||
|
||||
# Explicitly clean up resources
|
||||
page_backend.unload()
|
||||
doc_backend.unload()
|
||||
|
||||
|
||||
def test_crop_page_image(test_doc_path):
|
||||
doc_backend = _get_backend(test_doc_path)
|
||||
@ -70,7 +80,14 @@ def test_crop_page_image(test_doc_path):
|
||||
)
|
||||
# im.show()
|
||||
|
||||
# Explicitly clean up resources
|
||||
page_backend.unload()
|
||||
doc_backend.unload()
|
||||
|
||||
|
||||
def test_num_pages(test_doc_path):
|
||||
doc_backend = _get_backend(test_doc_path)
|
||||
doc_backend.page_count() == 9
|
||||
|
||||
# Explicitly clean up resources to prevent race conditions in CI
|
||||
doc_backend.unload()
|
||||
|
Loading…
Reference in New Issue
Block a user