apply changes to the picture data annotations

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-07-27 04:24:45 +00:00 · 2024-10-16 13:24:21 +02:00 · 2024-10-16 13:24:21 +02:00 · d5f161d0f5
commit d5f161d0f5
parent dd2982cce1
7 changed files with 17 additions and 29 deletions
--- a/docling/backend/html_backend.py
+++ b/docling/backend/html_backend.py
@ -7,7 +7,6 @@ from bs4 import BeautifulSoup
 from docling_core.types.doc import (
    DescriptionItem,
    DoclingDocument,
    PictureData,
    TableCell,
    TableData,
 )
@ -406,9 +405,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
        contains_captions = element.find(["figcaption"])
        if contains_captions is None:
-            doc.add_picture(
+            doc.add_picture(parent=self.parents[self.level], caption=None)
                data=PictureData(), parent=self.parents[self.level], caption=None
            )
        else:
            texts = []
@ -419,13 +416,10 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
                label=DocItemLabel.CAPTION, text=("".join(texts)).strip()
            )
            doc.add_picture(
                data=PictureData(),
                parent=self.parents[self.level],
                caption=fig_caption,
            )
    def handle_image(self, element, idx, doc):
        """Handles image tags (img)."""
-        doc.add_picture(
+        doc.add_picture(parent=self.parents[self.level], caption=None)
            data=PictureData(), parent=self.parents[self.level], caption=None
        )
--- a/docling/backend/mspowerpoint_backend.py
+++ b/docling/backend/mspowerpoint_backend.py
@ -9,7 +9,6 @@ from docling_core.types.doc import (
    DoclingDocument,
    DocumentOrigin,
    GroupLabel,
    PictureData,
    ProvenanceItem,
    TableCell,
    TableData,
@ -243,9 +242,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
    def handle_pictures(self, shape, parent_slide, slide_ind, doc):
        # shape has picture
        prov = self.generate_prov(shape, slide_ind, "")
-        doc.add_picture(
+        doc.add_picture(parent=parent_slide, caption=None, prov=prov)
            data=PictureData(), parent=parent_slide, caption=None, prov=prov
        )
        return
    def handle_tables(self, shape, parent_slide, slide_ind, doc):
--- a/docling/backend/msword_backend.py
+++ b/docling/backend/msword_backend.py
@ -9,7 +9,6 @@ from docling_core.types.doc import (
    DocItemLabel,
    DoclingDocument,
    GroupLabel,
    PictureData,
    TableCell,
    TableData,
 )
@ -492,7 +491,5 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
        return
    def handle_pictures(self, element, docx_obj, doc):
-        doc.add_picture(
+        doc.add_picture(parent=self.parents[self.level], caption=None)
            data=PictureData(), parent=self.parents[self.level], caption=None
        )
        return
--- a/docling/datamodel/base_models.py
+++ b/docling/datamodel/base_models.py
@ -3,7 +3,7 @@ from io import BytesIO
 from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union
 from docling_core.types.doc import BoundingBox, Size
-from docling_core.types.doc.document import PictureData, TableCell
+from docling_core.types.doc.document import PictureDataType, TableCell
 from docling_core.types.doc.labels import DocItemLabel
 from PIL.Image import Image
 from pydantic import BaseModel, ConfigDict
@ -131,7 +131,7 @@ class TextElement(BasePageElement):
 class FigureElement(BasePageElement):
-    data: Optional[PictureData] = None
+    annotations: List[PictureDataType] = []
    provenance: Optional[str] = None
    predicted_class: Optional[str] = None
    confidence: Optional[float] = None
--- a/docs/examples/develop_picture_enrichment.py
+++ b/docs/examples/develop_picture_enrichment.py
@ -36,10 +36,10 @@ class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel):
            # uncomment this to interactively visualize the image
            # element.image.pil_image.show()
-            element.data.classification = PictureClassificationData(
+            element.annotations.append(PictureClassificationData(
                provenance="example_classifier-0.0.1",
                predicted_classes=[PictureClassificationClass(class_name="dummy", confidence=0.42)]
-            )
+            ))
            yield element
@ -83,7 +83,7 @@ def main():
    for element, _level in result.document.iterate_items():
        if isinstance(element, PictureItem):
            print(
-                f"The model populated the `data` portion of picture {element.self_ref}:\n{element.data}"
+                f"The model populated the `data` portion of picture {element.self_ref}:\n{element.annotations}"
            )
--- a/poetry.lock
+++ b/poetry.lock
@ -898,7 +898,7 @@ files = []
 develop = false
 [package.dependencies]
-docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "33aa21408400c9c475db0f8c6be681b888388284"}
+docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "002f784745bf2e2bcf9def81d070c59f2e7c61c2"}
 docutils = "!=0.21"
 matplotlib = "^3.7.1"
 networkx = "^3.1"
@ -922,8 +922,8 @@ toolkit = ["deepsearch-toolkit (>=0.31.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/DS4SD/deepsearch-glm.git"
-reference = "8ab1b4372122c820a28badd3c6095c2ce2feaf61"
+reference = "f219bbfb8065e787b481d6b12ca22db8e31e865e"
-resolved_reference = "8ab1b4372122c820a28badd3c6095c2ce2feaf61"
+resolved_reference = "f219bbfb8065e787b481d6b12ca22db8e31e865e"
 [[package]]
 name = "defusedxml"
@ -982,8 +982,8 @@ tabulate = "^0.9.0"
 [package.source]
 type = "git"
 url = "https://github.com/DS4SD/docling-core.git"
-reference = "33aa21408400c9c475db0f8c6be681b888388284"
+reference = "002f784745bf2e2bcf9def81d070c59f2e7c61c2"
-resolved_reference = "33aa21408400c9c475db0f8c6be681b888388284"
+resolved_reference = "002f784745bf2e2bcf9def81d070c59f2e7c61c2"
 [[package]]
 name = "docling-ibm-models"
@ -7496,4 +7496,4 @@ tesserocr = ["tesserocr"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "70620592368cfa1a6a8a7e32e1f98f5f9f253f0d99f7a8bdfb6c46a0363b2408"
+content-hash = "3994b9c2200bb9827c76d84128fd7bbe1c1cc6f8e6cf1e34f9923c5511bc324a"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -37,9 +37,9 @@ torchvision = [
 ######################
 python = "^3.10"
 pydantic = "^2.0.0"
-docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "33aa21408400c9c475db0f8c6be681b888388284"}
+docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "002f784745bf2e2bcf9def81d070c59f2e7c61c2"}
 docling-ibm-models = "^2.0.1"
-deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "8ab1b4372122c820a28badd3c6095c2ce2feaf61"}
+deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "f219bbfb8065e787b481d6b12ca22db8e31e865e"}
 filetype = "^1.2.0"
 pypdfium2 = "^4.30.0"
 pydantic-settings = "^2.3.0"