feat: allow computing page images on-demand with scale and cache them (#36)

* feat: allow computing page images on-demand and cache them Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * feat: expose scale for export of page images and document elements Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix comment Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-12-15 16:18:22 +00:00 · 2024-08-20 13:27:19 +02:00
parent c253dd743a
commit 78347bf679
9 changed files with 104 additions and 77 deletions
--- a/docling/datamodel/base_models.py
+++ b/docling/datamodel/base_models.py
@@ -1,10 +1,12 @@
 import copy
+import warnings
 from enum import Enum, auto
 from io import BytesIO
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Annotated, Any, Dict, List, Optional, Tuple, Union

 from PIL.Image import Image
-from pydantic import BaseModel, ConfigDict, model_validator
+from pydantic import BaseModel, ConfigDict, Field, model_validator
+from typing_extensions import Self

 from docling.backend.abstract_backend import PdfPageBackend

@@ -234,14 +236,30 @@ class Page(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)

    page_no: int
-    page_hash: str = None
-    size: PageSize = None
-    image: Image = None
+    page_hash: Optional[str] = None
+    size: Optional[PageSize] = None
    cells: List[Cell] = None
    predictions: PagePredictions = PagePredictions()
-    assembled: AssembledUnit = None
+    assembled: Optional[AssembledUnit] = None

-    _backend: PdfPageBackend = None  # Internal PDF backend
+    _backend: Optional[PdfPageBackend] = (
+        None  # Internal PDF backend. By default it is cleared during assembling.
+    )
+    _default_image_scale: float = 1.0  # Default image scale for external usage.
+    _image_cache: Dict[float, Image] = (
+        {}
+    )  # Cache of images in different scales. By default it is cleared during assembling.
+
+    def get_image(self, scale: float = 1.0) -> Optional[Image]:
+        if self._backend is None:
+            return self._image_cache.get(scale, None)
+        if not scale in self._image_cache:
+            self._image_cache[scale] = self._backend.get_page_image(scale=scale)
+        return self._image_cache[scale]
+
+    @property
+    def image(self) -> Optional[Image]:
+        return self.get_image(scale=self._default_image_scale)


 class DocumentStream(BaseModel):
@@ -268,6 +286,19 @@ class PipelineOptions(BaseModel):


 class AssembleOptions(BaseModel):
-    keep_page_images: bool = (
-        False  # False: page images are removed in the assemble step
-    )
+    keep_page_images: Annotated[
+        bool,
+        Field(
+            deprecated="`keep_page_images` is depreacted, set the value of `images_scale` instead"
+        ),
+    ] = False  # False: page images are removed in the assemble step
+    images_scale: Optional[float] = None  # if set, the scale for generated images
+
+    @model_validator(mode="after")
+    def set_page_images_from_deprecated(self) -> Self:
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", DeprecationWarning)
+            default_scale = 1.0
+            if self.keep_page_images and self.images_scale is None:
+                self.images_scale = default_scale
+        return self
--- a/docling/datamodel/document.py
+++ b/docling/datamodel/document.py
@@ -1,7 +1,7 @@
 import logging
 from io import BytesIO
 from pathlib import Path, PurePath
-from typing import ClassVar, Dict, Iterable, List, Optional, Type, Union
+from typing import ClassVar, Dict, Iterable, List, Optional, Tuple, Type, Union

 from docling_core.types import BaseCell, BaseText
 from docling_core.types import BoundingBox as DsBoundingBox
@@ -21,6 +21,7 @@ from docling.datamodel.base_models import (
    DocumentStream,
    FigureElement,
    Page,
+    PageElement,
    TableElement,
    TextElement,
 )
@@ -302,6 +303,20 @@ class ConvertedDocument(BaseModel):
        else:
            return ""

+    def render_element_images(
+        self, element_types: Tuple[PageElement] = (FigureElement,)
+    ):
+        for element in self.assembled.elements:
+            if isinstance(element, element_types):
+                page_ix = element.page_no
+                scale = self.pages[page_ix]._default_image_scale
+                crop_bbox = element.cluster.bbox.scaled(scale=scale).to_top_left_origin(
+                    page_height=self.pages[page_ix].size.height * scale
+                )
+
+                cropped_im = self.pages[page_ix].image.crop(crop_bbox.as_tuple())
+                yield element, cropped_im
+

 class DocumentConversionInput(BaseModel):