feat: allow computing page images on-demand with scale and cache them (#36)

* feat: allow computing page images on-demand and cache them Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * feat: expose scale for export of page images and document elements Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix comment Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-12-08 20:58:11 +00:00 · 2024-08-20 13:27:19 +02:00
parent c253dd743a
commit 78347bf679
9 changed files with 104 additions and 77 deletions
--- a/examples/export_figures.py
+++ b/examples/export_figures.py
@@ -15,44 +15,7 @@ from docling.document_converter import DocumentConverter

 _log = logging.getLogger(__name__)

-
-def export_page_images(
-    doc: ConvertedDocument,
-    output_dir: Path,
-):
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    doc_filename = doc.input.file.stem
-
-    for page in doc.pages:
-        page_no = page.page_no + 1
-        page_image_filename = output_dir / f"{doc_filename}-{page_no}.png"
-        with page_image_filename.open("wb") as fp:
-            page.image.save(fp, format="PNG")
-
-
-def export_element_images(
-    doc: ConvertedDocument,
-    output_dir: Path,
-    allowed_element_types: Tuple[PageElement] = (FigureElement,),
-):
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    doc_filename = doc.input.file.stem
-
-    for element_ix, element in enumerate(doc.assembled.elements):
-        if isinstance(element, allowed_element_types):
-            page_ix = element.page_no
-            crop_bbox = element.cluster.bbox.to_top_left_origin(
-                page_height=doc.pages[page_ix].size.height
-            )
-
-            cropped_im = doc.pages[page_ix].image.crop(crop_bbox.as_tuple())
-            element_image_filename = (
-                output_dir / f"{doc_filename}-element-{element_ix}.png"
-            )
-            with element_image_filename.open("wb") as fp:
-                cropped_im.save(fp, "PNG")
+IMAGE_RESOLUTION_SCALE = 2.0


 def main():
@@ -61,13 +24,16 @@ def main():
    input_doc_paths = [
        Path("./test/data/2206.01062.pdf"),
    ]
+    output_dir = Path("./scratch")

    input_files = DocumentConversionInput.from_paths(input_doc_paths)

    # Important: For operating with page images, we must keep them, otherwise the DocumentConverter
    # will destroy them for cleaning up memory.
+    # This is done by setting AssembleOptions.images_scale, which also defines the scale of images.
+    # scale=1 correspond of a standard 72 DPI image
    assemble_options = AssembleOptions()
-    assemble_options.keep_page_images = True
+    assemble_options.images_scale = IMAGE_RESOLUTION_SCALE

    doc_converter = DocumentConverter(assemble_options=assemble_options)

@@ -75,23 +41,30 @@ def main():

    converted_docs = doc_converter.convert(input_files)

+    output_dir.mkdir(parents=True, exist_ok=True)
    for doc in converted_docs:
        if doc.status != ConversionStatus.SUCCESS:
            _log.info(f"Document {doc.input.file} failed to convert.")
            continue

-        # Export page images
-        export_page_images(doc, output_dir=Path("./scratch"))
+        doc_filename = doc.input.file.stem

-        # Export figures
-        # export_element_images(doc, output_dir=Path("./scratch"), allowed_element_types=(FigureElement,))
+        # Export page images
+        for page in doc.pages:
+            page_no = page.page_no + 1
+            page_image_filename = output_dir / f"{doc_filename}-{page_no}.png"
+            with page_image_filename.open("wb") as fp:
+                page.image.save(fp, format="PNG")

        # Export figures and tables
-        export_element_images(
-            doc,
-            output_dir=Path("./scratch"),
-            allowed_element_types=(FigureElement, TableElement),
-        )
+        for element, image in doc.render_element_images(
+            element_types=(FigureElement, TableElement)
+        ):
+            element_image_filename = (
+                output_dir / f"{doc_filename}-element-{element.id}.png"
+            )
+            with element_image_filename.open("wb") as fp:
+                image.save(fp, "PNG")

    end_time = time.time() - start_time