feat: allow computing page images on-demand with scale and cache them (#36)

* feat: allow computing page images on-demand and cache them

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* feat: expose scale for export of page images and document elements

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* fix comment

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2024-08-20 13:27:19 +02:00
committed by GitHub
parent c253dd743a
commit 78347bf679
9 changed files with 104 additions and 77 deletions

View File

@@ -30,7 +30,7 @@ class EasyOcrModel:
for page in page_batch:
# rects = page._fpage.
high_res_image = page._backend.get_page_image(scale=self.scale)
high_res_image = page.get_image(scale=self.scale)
im = numpy.array(high_res_image)
result = self.reader.readtext(im)

View File

@@ -267,7 +267,9 @@ class LayoutModel:
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
for page in page_batch:
clusters = []
for ix, pred_item in enumerate(self.layout_predictor.predict(page.image)):
for ix, pred_item in enumerate(
self.layout_predictor.predict(page.get_image(scale=1.0))
):
cluster = Cluster(
id=ix,
label=pred_item["label"],

View File

@@ -34,7 +34,9 @@ class TableStructureModel:
self.scale = 2.0 # Scale up table input images to 144 dpi
def draw_table_and_cells(self, page: Page, tbl_list: List[TableElement]):
image = page._backend.get_page_image()
image = (
page._backend.get_page_image()
) # make new image to avoid drawing on the saved ones
draw = ImageDraw.Draw(image)
for table_element in tbl_list:
@@ -94,13 +96,7 @@ class TableStructureModel:
"width": page.size.width * self.scale,
"height": page.size.height * self.scale,
}
# add image to page input.
if self.scale == 1.0:
page_input["image"] = numpy.asarray(page.image)
else: # render new page image on the fly at desired scale
page_input["image"] = numpy.asarray(
page._backend.get_page_image(scale=self.scale)
)
page_input["image"] = numpy.asarray(page.get_image(scale=self.scale))
table_clusters, table_bboxes = zip(*in_tables)