fix: Determine correct page size in DoclingParseV4Backend

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2025-03-19 09:38:55 +01:00
parent 54a78c307d
commit 56d3a590f2

View File

@ -112,23 +112,30 @@ class DoclingParseV4PageBackend(PdfPageBackend):
padbox.r = page_size.width - padbox.r padbox.r = page_size.width - padbox.r
padbox.t = page_size.height - padbox.t padbox.t = page_size.height - padbox.t
image = ( with pypdfium2_lock:
self._ppage.render( image = (
scale=scale * 1.5, self._ppage.render(
rotation=0, # no additional rotation scale=scale * 1.5,
crop=padbox.as_tuple(), rotation=0, # no additional rotation
) crop=padbox.as_tuple(),
.to_pil() )
.resize(size=(round(cropbox.width * scale), round(cropbox.height * scale))) .to_pil()
) # We resize the image from 1.5x the given scale to make it sharper. .resize(
size=(round(cropbox.width * scale), round(cropbox.height * scale))
)
) # We resize the image from 1.5x the given scale to make it sharper.
return image return image
def get_size(self) -> Size: def get_size(self) -> Size:
return Size( with pypdfium2_lock:
width=self._dpage.dimension.width, return Size(width=self._ppage.get_width(), height=self._ppage.get_height())
height=self._dpage.dimension.height,
) # TODO: Take width and height from docling-parse.
# return Size(
# width=self._dpage.dimension.width,
# height=self._dpage.dimension.height,
# )
def unload(self): def unload(self):
self._ppage = None self._ppage = None