diff --git a/docling/backend/html_backend.py b/docling/backend/html_backend.py index c7e68681..a00c8970 100644 --- a/docling/backend/html_backend.py +++ b/docling/backend/html_backend.py @@ -7,7 +7,6 @@ from bs4 import BeautifulSoup from docling_core.types.doc import ( DescriptionItem, DoclingDocument, - PictureData, TableCell, TableData, ) @@ -406,9 +405,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend): contains_captions = element.find(["figcaption"]) if contains_captions is None: - doc.add_picture( - data=PictureData(), parent=self.parents[self.level], caption=None - ) + doc.add_picture(parent=self.parents[self.level], caption=None) else: texts = [] @@ -419,13 +416,10 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend): label=DocItemLabel.CAPTION, text=("".join(texts)).strip() ) doc.add_picture( - data=PictureData(), parent=self.parents[self.level], caption=fig_caption, ) def handle_image(self, element, idx, doc): """Handles image tags (img).""" - doc.add_picture( - data=PictureData(), parent=self.parents[self.level], caption=None - ) + doc.add_picture(parent=self.parents[self.level], caption=None) diff --git a/docling/backend/mspowerpoint_backend.py b/docling/backend/mspowerpoint_backend.py index 876a10e1..fba3e31d 100644 --- a/docling/backend/mspowerpoint_backend.py +++ b/docling/backend/mspowerpoint_backend.py @@ -9,7 +9,6 @@ from docling_core.types.doc import ( DoclingDocument, DocumentOrigin, GroupLabel, - PictureData, ProvenanceItem, TableCell, TableData, @@ -243,9 +242,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB def handle_pictures(self, shape, parent_slide, slide_ind, doc): # shape has picture prov = self.generate_prov(shape, slide_ind, "") - doc.add_picture( - data=PictureData(), parent=parent_slide, caption=None, prov=prov - ) + doc.add_picture(parent=parent_slide, caption=None, prov=prov) return def handle_tables(self, shape, parent_slide, slide_ind, doc): diff --git a/docling/backend/msword_backend.py b/docling/backend/msword_backend.py index 54136fdd..182e31a9 100644 --- a/docling/backend/msword_backend.py +++ b/docling/backend/msword_backend.py @@ -9,7 +9,6 @@ from docling_core.types.doc import ( DocItemLabel, DoclingDocument, GroupLabel, - PictureData, TableCell, TableData, ) @@ -492,7 +491,5 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend): return def handle_pictures(self, element, docx_obj, doc): - doc.add_picture( - data=PictureData(), parent=self.parents[self.level], caption=None - ) + doc.add_picture(parent=self.parents[self.level], caption=None) return diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index 3a893fa1..e5d3de1a 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -3,7 +3,7 @@ from io import BytesIO from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union from docling_core.types.doc import BoundingBox, Size -from docling_core.types.doc.document import PictureData, TableCell +from docling_core.types.doc.document import PictureDataType, TableCell from docling_core.types.doc.labels import DocItemLabel from PIL.Image import Image from pydantic import BaseModel, ConfigDict @@ -131,7 +131,7 @@ class TextElement(BasePageElement): class FigureElement(BasePageElement): - data: Optional[PictureData] = None + annotations: List[PictureDataType] = [] provenance: Optional[str] = None predicted_class: Optional[str] = None confidence: Optional[float] = None diff --git a/docs/examples/develop_picture_enrichment.py b/docs/examples/develop_picture_enrichment.py index d7d2ad4f..1efa0c62 100644 --- a/docs/examples/develop_picture_enrichment.py +++ b/docs/examples/develop_picture_enrichment.py @@ -36,10 +36,10 @@ class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel): # uncomment this to interactively visualize the image # element.image.pil_image.show() - element.data.classification = PictureClassificationData( + element.annotations.append(PictureClassificationData( provenance="example_classifier-0.0.1", predicted_classes=[PictureClassificationClass(class_name="dummy", confidence=0.42)] - ) + )) yield element @@ -83,7 +83,7 @@ def main(): for element, _level in result.document.iterate_items(): if isinstance(element, PictureItem): print( - f"The model populated the `data` portion of picture {element.self_ref}:\n{element.data}" + f"The model populated the `data` portion of picture {element.self_ref}:\n{element.annotations}" ) diff --git a/poetry.lock b/poetry.lock index 1b3a89fa..b0c8b8f6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -898,7 +898,7 @@ files = [] develop = false [package.dependencies] -docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "33aa21408400c9c475db0f8c6be681b888388284"} +docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "002f784745bf2e2bcf9def81d070c59f2e7c61c2"} docutils = "!=0.21" matplotlib = "^3.7.1" networkx = "^3.1" @@ -922,8 +922,8 @@ toolkit = ["deepsearch-toolkit (>=0.31.0)"] [package.source] type = "git" url = "https://github.com/DS4SD/deepsearch-glm.git" -reference = "8ab1b4372122c820a28badd3c6095c2ce2feaf61" -resolved_reference = "8ab1b4372122c820a28badd3c6095c2ce2feaf61" +reference = "f219bbfb8065e787b481d6b12ca22db8e31e865e" +resolved_reference = "f219bbfb8065e787b481d6b12ca22db8e31e865e" [[package]] name = "defusedxml" @@ -982,8 +982,8 @@ tabulate = "^0.9.0" [package.source] type = "git" url = "https://github.com/DS4SD/docling-core.git" -reference = "33aa21408400c9c475db0f8c6be681b888388284" -resolved_reference = "33aa21408400c9c475db0f8c6be681b888388284" +reference = "002f784745bf2e2bcf9def81d070c59f2e7c61c2" +resolved_reference = "002f784745bf2e2bcf9def81d070c59f2e7c61c2" [[package]] name = "docling-ibm-models" @@ -7496,4 +7496,4 @@ tesserocr = ["tesserocr"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "70620592368cfa1a6a8a7e32e1f98f5f9f253f0d99f7a8bdfb6c46a0363b2408" +content-hash = "3994b9c2200bb9827c76d84128fd7bbe1c1cc6f8e6cf1e34f9923c5511bc324a" diff --git a/pyproject.toml b/pyproject.toml index 2b566688..eae33d17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,9 +37,9 @@ torchvision = [ ###################### python = "^3.10" pydantic = "^2.0.0" -docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "33aa21408400c9c475db0f8c6be681b888388284"} +docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "002f784745bf2e2bcf9def81d070c59f2e7c61c2"} docling-ibm-models = "^2.0.1" -deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "8ab1b4372122c820a28badd3c6095c2ce2feaf61"} +deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "f219bbfb8065e787b481d6b12ca22db8e31e865e"} filetype = "^1.2.0" pypdfium2 = "^4.30.0" pydantic-settings = "^2.3.0"