fix deadlock in pypdfium2 backend

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2025-03-02 09:58:06 -05:00
parent 762a511d0a
commit 346a49c283

View File

@ -42,12 +42,13 @@ class PyPdfiumPageBackend(PdfPageBackend):
def get_bitmap_rects(self, scale: float = 1) -> Iterable[BoundingBox]:
AREA_THRESHOLD = 0 # 32 * 32
page_size = self.get_size()
with pypdfium2_lock:
for obj in self._ppage.get_objects(filter=[pdfium_c.FPDF_PAGEOBJ_IMAGE]):
pos = obj.get_pos()
cropbox = BoundingBox.from_tuple(
pos, origin=CoordOrigin.BOTTOMLEFT
).to_top_left_origin(page_height=self.get_size().height)
).to_top_left_origin(page_height=page_size.height)
if cropbox.area() > AREA_THRESHOLD:
cropbox = cropbox.scaled(scale=scale)
@ -62,6 +63,7 @@ class PyPdfiumPageBackend(PdfPageBackend):
if bbox.coord_origin != CoordOrigin.BOTTOMLEFT:
bbox = bbox.to_bottom_left_origin(self.get_size().height)
with pypdfium2_lock:
text_piece = self.text_page.get_text_bounded(*bbox.as_tuple())
return text_piece
@ -76,6 +78,7 @@ class PyPdfiumPageBackend(PdfPageBackend):
page_size = self.get_size()
with pypdfium2_lock:
for i in range(self.text_page.count_rects()):
rect = self.text_page.get_rect(i)
text_piece = self.text_page.get_text_bounded(*rect)