fix generation of images and adapt examples

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-07-27 04:24:45 +00:00 · 2024-10-15 17:43:47 +02:00 · 2024-10-15 17:43:47 +02:00 · cd8e3dce76
commit cd8e3dce76
parent 75feef259d
5 changed files with 41 additions and 41 deletions
--- a/docling/datamodel/document.py
+++ b/docling/datamodel/document.py
@ -437,25 +437,6 @@ class ConversionResult(BaseModel):
        return ds_doc
    def render_element_images(
        self, element_types: Tuple[Type[PageElement]] = (FigureElement,)
    ):
        for element in self.assembled.elements:
            if isinstance(element, element_types):
                page_ix = element.page_no
                page = self.pages[page_ix]
                assert page.size is not None
                scale = page._default_image_scale
                crop_bbox = element.cluster.bbox.scaled(scale=scale).to_top_left_origin(
                    page_height=page.size.height * scale
                )
                page_img = page.image
                if page_img is not None:
                    cropped_im = page_img.crop(crop_bbox.as_tuple())
                    yield element, cropped_im
 class _DocumentConversionInput(BaseModel):
--- a/docling/pipeline/standard_pdf_pipeline.py
+++ b/docling/pipeline/standard_pdf_pipeline.py
@ -152,8 +152,8 @@ class StandardPdfPipeline(PaginatedPipeline):
        if self.pipeline_options.generate_page_images:
            for page in conv_res.pages:
                assert page.image is not None
-                page_ix = page.page_no - 1
+                page_no = page.page_no + 1
-                conv_res.document.pages[page_ix].image = ImageRef.from_pil(
+                conv_res.document.pages[page_no].image = ImageRef.from_pil(
                    page.image, dpi=int(72 * self.pipeline_options.images_scale)
                )
@ -174,17 +174,17 @@ class StandardPdfPipeline(PaginatedPipeline):
                    and self.pipeline_options.generate_table_images
                ):
                    page_ix = element.prov[0].page_no - 1
                    page = conv_res.pages[page_ix]
                    assert page.size is not None
                    assert page.image is not None
                    crop_bbox = (
                        element.prov[0]
                        .bbox.scaled(scale=scale)
-                        .to_top_left_origin(
+                        .to_top_left_origin(page_height=page.size.height * scale)
                            page_height=conv_res.pages[page_ix].size.height * scale
                        )
                    )
-                    cropped_im = conv_res.pages[page_ix].image.crop(
+                    cropped_im = page.image.crop(crop_bbox.as_tuple())
                        crop_bbox.as_tuple()
                    )
                    element.image = ImageRef.from_pil(cropped_im, dpi=int(72 * scale))
        return conv_res
--- a/docs/examples/develop_picture_enrichment.py
+++ b/docs/examples/develop_picture_enrichment.py
@ -34,7 +34,7 @@ class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel):
            assert isinstance(element, PictureItem)
            # uncomment this to interactively visualize the image
-            element.image.pil_image.show()
+            # element.image.pil_image.show()
            element.data.classification = PictureClassificationData(
                provenance="example_classifier-0.0.1",
--- a/docs/examples/export_figures.py
+++ b/docs/examples/export_figures.py
@ -1,7 +1,8 @@
 import logging
 import time
 from pathlib import Path
-import time
+from docling_core.types.doc.document import PictureItem, TableItem
 from docling.datamodel.base_models import FigureElement, InputFormat, Table
 from docling.datamodel.pipeline_options import PdfPipelineOptions
@ -20,10 +21,15 @@ def main():
    # Important: For operating with page images, we must keep them, otherwise the DocumentConverter
    # will destroy them for cleaning up memory.
-    # This is done by setting AssembleOptions.images_scale, which also defines the scale of images.
+    # This is done by setting PdfPipelineOptions.images_scale, which also defines the scale of images.
    # scale=1 correspond of a standard 72 DPI image
    # The PdfPipelineOptions.generate_* are the selectors for the document elements which will be enriched
    # with the image field
    pipeline_options = PdfPipelineOptions()
    pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
    pipeline_options.generate_page_images = True
    pipeline_options.generate_table_images = True
    pipeline_options.generate_picture_images = True
    doc_converter = DocumentConverter(
        format_options={
@ -38,20 +44,32 @@ def main():
    output_dir.mkdir(parents=True, exist_ok=True)
    doc_filename = conv_res.input.file.stem
-    # Export page images
+    # Save page images
-    for page in conv_res.pages:
+    for page_no, page in conv_res.document.pages.items():
-        page_no = page.page_no + 1
+        page_no = page.page_no
        page_image_filename = output_dir / f"{doc_filename}-{page_no}.png"
        with page_image_filename.open("wb") as fp:
-            page.image.save(fp, format="PNG")
+            page.image.pil_image.save(fp, format="PNG")
-    # Export figures and tables
+    # Save images of figures and tables
-    for element, image in conv_res.render_element_images(
+    table_counter = 0
-        element_types=(FigureElement, Table)
+    picture_counter = 0
-    ):
+    for element, _level in conv_res.document.iterate_items():
-        element_image_filename = output_dir / f"{doc_filename}-element-{element.id}.png"
+        if isinstance(element, TableItem):
-        with element_image_filename.open("wb") as fp:
+            table_counter += 1
-            image.save(fp, "PNG")
+            element_image_filename = (
                output_dir / f"{doc_filename}-table-{table_counter}.png"
            )
            with element_image_filename.open("wb") as fp:
                element.image.pil_image.save(fp, "PNG")
        if isinstance(element, PictureItem):
            picture_counter += 1
            element_image_filename = (
                output_dir / f"{doc_filename}-picture-{picture_counter}.png"
            )
            with element_image_filename.open("wb") as fp:
                element.image.pil_image.save(fp, "PNG")
    end_time = time.time() - start_time
--- a/docs/examples/export_multimodal.py
+++ b/docs/examples/export_multimodal.py
@ -28,6 +28,7 @@ def main():
    # scale=1 correspond of a standard 72 DPI image
    pipeline_options = PdfPipelineOptions()
    pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
    pipeline_options.generate_page_images = True
    doc_converter = DocumentConverter(
        format_options={