apply changes to the picture data annotations

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2024-10-16 13:24:21 +02:00
parent dd2982cce1
commit d5f161d0f5
7 changed files with 17 additions and 29 deletions

View File

@ -7,7 +7,6 @@ from bs4 import BeautifulSoup
from docling_core.types.doc import ( from docling_core.types.doc import (
DescriptionItem, DescriptionItem,
DoclingDocument, DoclingDocument,
PictureData,
TableCell, TableCell,
TableData, TableData,
) )
@ -406,9 +405,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
contains_captions = element.find(["figcaption"]) contains_captions = element.find(["figcaption"])
if contains_captions is None: if contains_captions is None:
doc.add_picture( doc.add_picture(parent=self.parents[self.level], caption=None)
data=PictureData(), parent=self.parents[self.level], caption=None
)
else: else:
texts = [] texts = []
@ -419,13 +416,10 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
label=DocItemLabel.CAPTION, text=("".join(texts)).strip() label=DocItemLabel.CAPTION, text=("".join(texts)).strip()
) )
doc.add_picture( doc.add_picture(
data=PictureData(),
parent=self.parents[self.level], parent=self.parents[self.level],
caption=fig_caption, caption=fig_caption,
) )
def handle_image(self, element, idx, doc): def handle_image(self, element, idx, doc):
"""Handles image tags (img).""" """Handles image tags (img)."""
doc.add_picture( doc.add_picture(parent=self.parents[self.level], caption=None)
data=PictureData(), parent=self.parents[self.level], caption=None
)

View File

@ -9,7 +9,6 @@ from docling_core.types.doc import (
DoclingDocument, DoclingDocument,
DocumentOrigin, DocumentOrigin,
GroupLabel, GroupLabel,
PictureData,
ProvenanceItem, ProvenanceItem,
TableCell, TableCell,
TableData, TableData,
@ -243,9 +242,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
def handle_pictures(self, shape, parent_slide, slide_ind, doc): def handle_pictures(self, shape, parent_slide, slide_ind, doc):
# shape has picture # shape has picture
prov = self.generate_prov(shape, slide_ind, "") prov = self.generate_prov(shape, slide_ind, "")
doc.add_picture( doc.add_picture(parent=parent_slide, caption=None, prov=prov)
data=PictureData(), parent=parent_slide, caption=None, prov=prov
)
return return
def handle_tables(self, shape, parent_slide, slide_ind, doc): def handle_tables(self, shape, parent_slide, slide_ind, doc):

View File

@ -9,7 +9,6 @@ from docling_core.types.doc import (
DocItemLabel, DocItemLabel,
DoclingDocument, DoclingDocument,
GroupLabel, GroupLabel,
PictureData,
TableCell, TableCell,
TableData, TableData,
) )
@ -492,7 +491,5 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
return return
def handle_pictures(self, element, docx_obj, doc): def handle_pictures(self, element, docx_obj, doc):
doc.add_picture( doc.add_picture(parent=self.parents[self.level], caption=None)
data=PictureData(), parent=self.parents[self.level], caption=None
)
return return

View File

@ -3,7 +3,7 @@ from io import BytesIO
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union
from docling_core.types.doc import BoundingBox, Size from docling_core.types.doc import BoundingBox, Size
from docling_core.types.doc.document import PictureData, TableCell from docling_core.types.doc.document import PictureDataType, TableCell
from docling_core.types.doc.labels import DocItemLabel from docling_core.types.doc.labels import DocItemLabel
from PIL.Image import Image from PIL.Image import Image
from pydantic import BaseModel, ConfigDict from pydantic import BaseModel, ConfigDict
@ -131,7 +131,7 @@ class TextElement(BasePageElement):
class FigureElement(BasePageElement): class FigureElement(BasePageElement):
data: Optional[PictureData] = None annotations: List[PictureDataType] = []
provenance: Optional[str] = None provenance: Optional[str] = None
predicted_class: Optional[str] = None predicted_class: Optional[str] = None
confidence: Optional[float] = None confidence: Optional[float] = None

View File

@ -36,10 +36,10 @@ class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel):
# uncomment this to interactively visualize the image # uncomment this to interactively visualize the image
# element.image.pil_image.show() # element.image.pil_image.show()
element.data.classification = PictureClassificationData( element.annotations.append(PictureClassificationData(
provenance="example_classifier-0.0.1", provenance="example_classifier-0.0.1",
predicted_classes=[PictureClassificationClass(class_name="dummy", confidence=0.42)] predicted_classes=[PictureClassificationClass(class_name="dummy", confidence=0.42)]
) ))
yield element yield element
@ -83,7 +83,7 @@ def main():
for element, _level in result.document.iterate_items(): for element, _level in result.document.iterate_items():
if isinstance(element, PictureItem): if isinstance(element, PictureItem):
print( print(
f"The model populated the `data` portion of picture {element.self_ref}:\n{element.data}" f"The model populated the `data` portion of picture {element.self_ref}:\n{element.annotations}"
) )

12
poetry.lock generated
View File

@ -898,7 +898,7 @@ files = []
develop = false develop = false
[package.dependencies] [package.dependencies]
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "33aa21408400c9c475db0f8c6be681b888388284"} docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "002f784745bf2e2bcf9def81d070c59f2e7c61c2"}
docutils = "!=0.21" docutils = "!=0.21"
matplotlib = "^3.7.1" matplotlib = "^3.7.1"
networkx = "^3.1" networkx = "^3.1"
@ -922,8 +922,8 @@ toolkit = ["deepsearch-toolkit (>=0.31.0)"]
[package.source] [package.source]
type = "git" type = "git"
url = "https://github.com/DS4SD/deepsearch-glm.git" url = "https://github.com/DS4SD/deepsearch-glm.git"
reference = "8ab1b4372122c820a28badd3c6095c2ce2feaf61" reference = "f219bbfb8065e787b481d6b12ca22db8e31e865e"
resolved_reference = "8ab1b4372122c820a28badd3c6095c2ce2feaf61" resolved_reference = "f219bbfb8065e787b481d6b12ca22db8e31e865e"
[[package]] [[package]]
name = "defusedxml" name = "defusedxml"
@ -982,8 +982,8 @@ tabulate = "^0.9.0"
[package.source] [package.source]
type = "git" type = "git"
url = "https://github.com/DS4SD/docling-core.git" url = "https://github.com/DS4SD/docling-core.git"
reference = "33aa21408400c9c475db0f8c6be681b888388284" reference = "002f784745bf2e2bcf9def81d070c59f2e7c61c2"
resolved_reference = "33aa21408400c9c475db0f8c6be681b888388284" resolved_reference = "002f784745bf2e2bcf9def81d070c59f2e7c61c2"
[[package]] [[package]]
name = "docling-ibm-models" name = "docling-ibm-models"
@ -7496,4 +7496,4 @@ tesserocr = ["tesserocr"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.10" python-versions = "^3.10"
content-hash = "70620592368cfa1a6a8a7e32e1f98f5f9f253f0d99f7a8bdfb6c46a0363b2408" content-hash = "3994b9c2200bb9827c76d84128fd7bbe1c1cc6f8e6cf1e34f9923c5511bc324a"

View File

@ -37,9 +37,9 @@ torchvision = [
###################### ######################
python = "^3.10" python = "^3.10"
pydantic = "^2.0.0" pydantic = "^2.0.0"
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "33aa21408400c9c475db0f8c6be681b888388284"} docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "002f784745bf2e2bcf9def81d070c59f2e7c61c2"}
docling-ibm-models = "^2.0.1" docling-ibm-models = "^2.0.1"
deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "8ab1b4372122c820a28badd3c6095c2ce2feaf61"} deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "f219bbfb8065e787b481d6b12ca22db8e31e865e"}
filetype = "^1.2.0" filetype = "^1.2.0"
pypdfium2 = "^4.30.0" pypdfium2 = "^4.30.0"
pydantic-settings = "^2.3.0" pydantic-settings = "^2.3.0"