fix generation of images and adapt examples

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-07-27 04:24:45 +00:00 · 2024-10-15 17:43:47 +02:00 · 2024-10-15 17:43:47 +02:00 · cd8e3dce76
commit cd8e3dce76
parent 75feef259d
5 changed files with 41 additions and 41 deletions
--- a/docling/datamodel/document.py
+++ b/docling/datamodel/document.py
@ -437,25 +437,6 @@ class ConversionResult(BaseModel):

        return ds_doc

-    def render_element_images(
-        self, element_types: Tuple[Type[PageElement]] = (FigureElement,)
-    ):
-        for element in self.assembled.elements:
-            if isinstance(element, element_types):
-                page_ix = element.page_no
-                page = self.pages[page_ix]
-
-                assert page.size is not None
-
-                scale = page._default_image_scale
-                crop_bbox = element.cluster.bbox.scaled(scale=scale).to_top_left_origin(
-                    page_height=page.size.height * scale
-                )
-                page_img = page.image
-                if page_img is not None:
-                    cropped_im = page_img.crop(crop_bbox.as_tuple())
-                    yield element, cropped_im
-

 class _DocumentConversionInput(BaseModel):

--- a/docling/pipeline/standard_pdf_pipeline.py
+++ b/docling/pipeline/standard_pdf_pipeline.py
@ -152,8 +152,8 @@ class StandardPdfPipeline(PaginatedPipeline):
        if self.pipeline_options.generate_page_images:
            for page in conv_res.pages:
                assert page.image is not None
-                page_ix = page.page_no - 1
-                conv_res.document.pages[page_ix].image = ImageRef.from_pil(
+                page_no = page.page_no + 1
+                conv_res.document.pages[page_no].image = ImageRef.from_pil(
                    page.image, dpi=int(72 * self.pipeline_options.images_scale)
                )

@ -174,17 +174,17 @@ class StandardPdfPipeline(PaginatedPipeline):
                    and self.pipeline_options.generate_table_images
                ):
                    page_ix = element.prov[0].page_no - 1
+                    page = conv_res.pages[page_ix]
+                    assert page.size is not None
+                    assert page.image is not None
+
                    crop_bbox = (
                        element.prov[0]
                        .bbox.scaled(scale=scale)
-                        .to_top_left_origin(
-                            page_height=conv_res.pages[page_ix].size.height * scale
-                        )
+                        .to_top_left_origin(page_height=page.size.height * scale)
                    )

-                    cropped_im = conv_res.pages[page_ix].image.crop(
-                        crop_bbox.as_tuple()
-                    )
+                    cropped_im = page.image.crop(crop_bbox.as_tuple())
                    element.image = ImageRef.from_pil(cropped_im, dpi=int(72 * scale))

        return conv_res
--- a/docs/examples/develop_picture_enrichment.py
+++ b/docs/examples/develop_picture_enrichment.py
@ -34,7 +34,7 @@ class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel):
            assert isinstance(element, PictureItem)

            # uncomment this to interactively visualize the image
-            element.image.pil_image.show()
+            # element.image.pil_image.show()

            element.data.classification = PictureClassificationData(
                provenance="example_classifier-0.0.1",
--- a/docs/examples/export_figures.py
+++ b/docs/examples/export_figures.py
@ -1,7 +1,8 @@
 import logging
+import time
 from pathlib import Path

-import time
+from docling_core.types.doc.document import PictureItem, TableItem

 from docling.datamodel.base_models import FigureElement, InputFormat, Table
 from docling.datamodel.pipeline_options import PdfPipelineOptions
@ -20,10 +21,15 @@ def main():

    # Important: For operating with page images, we must keep them, otherwise the DocumentConverter
    # will destroy them for cleaning up memory.
-    # This is done by setting AssembleOptions.images_scale, which also defines the scale of images.
+    # This is done by setting PdfPipelineOptions.images_scale, which also defines the scale of images.
    # scale=1 correspond of a standard 72 DPI image
+    # The PdfPipelineOptions.generate_* are the selectors for the document elements which will be enriched
+    # with the image field
    pipeline_options = PdfPipelineOptions()
    pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
+    pipeline_options.generate_page_images = True
+    pipeline_options.generate_table_images = True
+    pipeline_options.generate_picture_images = True

    doc_converter = DocumentConverter(
        format_options={
@ -38,20 +44,32 @@ def main():
    output_dir.mkdir(parents=True, exist_ok=True)
    doc_filename = conv_res.input.file.stem

-    # Export page images
-    for page in conv_res.pages:
-        page_no = page.page_no + 1
+    # Save page images
+    for page_no, page in conv_res.document.pages.items():
+        page_no = page.page_no
        page_image_filename = output_dir / f"{doc_filename}-{page_no}.png"
        with page_image_filename.open("wb") as fp:
-            page.image.save(fp, format="PNG")
+            page.image.pil_image.save(fp, format="PNG")

-    # Export figures and tables
-    for element, image in conv_res.render_element_images(
-        element_types=(FigureElement, Table)
-    ):
-        element_image_filename = output_dir / f"{doc_filename}-element-{element.id}.png"
+    # Save images of figures and tables
+    table_counter = 0
+    picture_counter = 0
+    for element, _level in conv_res.document.iterate_items():
+        if isinstance(element, TableItem):
+            table_counter += 1
+            element_image_filename = (
+                output_dir / f"{doc_filename}-table-{table_counter}.png"
+            )
            with element_image_filename.open("wb") as fp:
-            image.save(fp, "PNG")
+                element.image.pil_image.save(fp, "PNG")
+
+        if isinstance(element, PictureItem):
+            picture_counter += 1
+            element_image_filename = (
+                output_dir / f"{doc_filename}-picture-{picture_counter}.png"
+            )
+            with element_image_filename.open("wb") as fp:
+                element.image.pil_image.save(fp, "PNG")

    end_time = time.time() - start_time

--- a/docs/examples/export_multimodal.py
+++ b/docs/examples/export_multimodal.py
@ -28,6 +28,7 @@ def main():
    # scale=1 correspond of a standard 72 DPI image
    pipeline_options = PdfPipelineOptions()
    pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
+    pipeline_options.generate_page_images = True

    doc_converter = DocumentConverter(
        format_options={