fix: Determine correct page size in DoclingParseV4Backend

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2025-03-19 09:38:55 +01:00
parent 54a78c307d
commit 56d3a590f2

View File

@ -112,6 +112,7 @@ class DoclingParseV4PageBackend(PdfPageBackend):
padbox.r = page_size.width - padbox.r
padbox.t = page_size.height - padbox.t
with pypdfium2_lock:
image = (
self._ppage.render(
scale=scale * 1.5,
@ -119,16 +120,22 @@ class DoclingParseV4PageBackend(PdfPageBackend):
crop=padbox.as_tuple(),
)
.to_pil()
.resize(size=(round(cropbox.width * scale), round(cropbox.height * scale)))
.resize(
size=(round(cropbox.width * scale), round(cropbox.height * scale))
)
) # We resize the image from 1.5x the given scale to make it sharper.
return image
def get_size(self) -> Size:
return Size(
width=self._dpage.dimension.width,
height=self._dpage.dimension.height,
)
with pypdfium2_lock:
return Size(width=self._ppage.get_width(), height=self._ppage.get_height())
# TODO: Take width and height from docling-parse.
# return Size(
# width=self._dpage.dimension.width,
# height=self._dpage.dimension.height,
# )
def unload(self):
self._ppage = None