use new PictureData

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2024-10-13 16:48:16 +02:00
parent c1ed447c21
commit 7c8d7e222e
7 changed files with 29 additions and 26 deletions

View File

@ -5,10 +5,10 @@ from typing import Set, Union
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from docling_core.types.experimental import ( from docling_core.types.experimental import (
BasePictureData,
BaseTableData, BaseTableData,
DescriptionItem, DescriptionItem,
DoclingDocument, DoclingDocument,
PictureData,
TableCell, TableCell,
) )
from docling_core.types.experimental.labels import DocItemLabel, GroupLabel from docling_core.types.experimental.labels import DocItemLabel, GroupLabel
@ -400,7 +400,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
contains_captions = element.find(["figcaption"]) contains_captions = element.find(["figcaption"])
if contains_captions is None: if contains_captions is None:
doc.add_picture( doc.add_picture(
data=BasePictureData(), parent=self.parents[self.level], caption=None data=PictureData(), parent=self.parents[self.level], caption=None
) )
else: else:
@ -412,7 +412,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
label=DocItemLabel.CAPTION, text=("".join(texts)).strip() label=DocItemLabel.CAPTION, text=("".join(texts)).strip()
) )
doc.add_picture( doc.add_picture(
data=BasePictureData(), data=PictureData(),
parent=self.parents[self.level], parent=self.parents[self.level],
caption=fig_caption, caption=fig_caption,
) )
@ -420,5 +420,5 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
def handle_image(self, element, idx, doc): def handle_image(self, element, idx, doc):
"""Handles image tags (img).""" """Handles image tags (img)."""
doc.add_picture( doc.add_picture(
data=BasePictureData(), parent=self.parents[self.level], caption=None data=PictureData(), parent=self.parents[self.level], caption=None
) )

View File

@ -4,13 +4,13 @@ from pathlib import Path
from typing import Set, Union from typing import Set, Union
from docling_core.types.experimental import ( from docling_core.types.experimental import (
BasePictureData,
BaseTableData, BaseTableData,
DescriptionItem, DescriptionItem,
DocItemLabel, DocItemLabel,
DoclingDocument, DoclingDocument,
DocumentOrigin, DocumentOrigin,
GroupLabel, GroupLabel,
PictureData,
ProvenanceItem, ProvenanceItem,
TableCell, TableCell,
) )
@ -204,7 +204,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
# shape has picture # shape has picture
prov = self.generate_prov(shape, slide_ind, "") prov = self.generate_prov(shape, slide_ind, "")
doc.add_picture( doc.add_picture(
data=BasePictureData(), parent=parent_slide, caption=None, prov=prov data=PictureData(), parent=parent_slide, caption=None, prov=prov
) )
return return

View File

@ -5,12 +5,12 @@ from typing import Set, Union
import docx import docx
from docling_core.types.experimental import ( from docling_core.types.experimental import (
BasePictureData,
BaseTableData, BaseTableData,
DescriptionItem, DescriptionItem,
DocItemLabel, DocItemLabel,
DoclingDocument, DoclingDocument,
GroupLabel, GroupLabel,
PictureData,
TableCell, TableCell,
) )
from lxml import etree from lxml import etree
@ -419,6 +419,6 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
def handle_pictures(self, element, docx_obj, doc): def handle_pictures(self, element, docx_obj, doc):
doc.add_picture( doc.add_picture(
data=BasePictureData(), parent=self.parents[self.level], caption=None data=PictureData(), parent=self.parents[self.level], caption=None
) )
return return

View File

@ -3,7 +3,7 @@ from io import BytesIO
from typing import Dict, List, Optional, Union from typing import Dict, List, Optional, Union
from docling_core.types.experimental import BoundingBox, Size from docling_core.types.experimental import BoundingBox, Size
from docling_core.types.experimental.document import BasePictureData, TableCell from docling_core.types.experimental.document import PictureData, TableCell
from docling_core.types.experimental.labels import DocItemLabel from docling_core.types.experimental.labels import DocItemLabel
from PIL.Image import Image from PIL.Image import Image
from pydantic import BaseModel, ConfigDict from pydantic import BaseModel, ConfigDict
@ -109,7 +109,7 @@ class TextElement(BasePageElement): ...
class FigureElement(BasePageElement): class FigureElement(BasePageElement):
data: Optional[BasePictureData] = None data: Optional[PictureData] = None
provenance: Optional[str] = None provenance: Optional[str] = None
predicted_class: Optional[str] = None predicted_class: Optional[str] = None
confidence: Optional[float] = None confidence: Optional[float] = None

View File

@ -1,15 +1,14 @@
from typing import Any, Iterable from typing import Any, Iterable
from docling_core.types.experimental import DoclingDocument, NodeItem from docling_core.types.experimental import DoclingDocument, NodeItem
from docling_core.types.experimental.document import BasePictureData, PictureItem from docling_core.types.experimental.document import (
PictureClassificationData,
PictureItem,
)
from docling.models.base_model import BaseEnrichmentModel from docling.models.base_model import BaseEnrichmentModel
class DummyPictureData(BasePictureData):
hello: str
class DummyPictureClassifierEnrichmentModel(BaseEnrichmentModel): class DummyPictureClassifierEnrichmentModel(BaseEnrichmentModel):
def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool: def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool:
return isinstance(element, PictureItem) return isinstance(element, PictureItem)
@ -19,6 +18,10 @@ class DummyPictureClassifierEnrichmentModel(BaseEnrichmentModel):
) -> Iterable[Any]: ) -> Iterable[Any]:
for element in element_batch: for element in element_batch:
assert isinstance(element, PictureItem) assert isinstance(element, PictureItem)
element.data = DummyPictureData(hello="world") element.data.classification = PictureClassificationData(
provenance="dummy_classifier-0.0.1",
predicted_class="dummy",
confidence=0.42,
)
yield element yield element

16
poetry.lock generated
View File

@ -885,7 +885,7 @@ files = []
develop = false develop = false
[package.dependencies] [package.dependencies]
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "baceeaeaa690a12f717918d17336fcbfe414cbb8"} docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "8223654d87631ec61b9ec3570728e878d85d2ecf"}
docutils = "!=0.21" docutils = "!=0.21"
matplotlib = "^3.7.1" matplotlib = "^3.7.1"
networkx = "^3.1" networkx = "^3.1"
@ -909,8 +909,8 @@ toolkit = ["deepsearch-toolkit (>=0.31.0)"]
[package.source] [package.source]
type = "git" type = "git"
url = "https://github.com/DS4SD/deepsearch-glm.git" url = "https://github.com/DS4SD/deepsearch-glm.git"
reference = "af4557df1500d15f82a0e0c9d2a3b64afc3e6ac1" reference = "53874bd5c39bb3fe389663992b3efd3fedaf5697"
resolved_reference = "af4557df1500d15f82a0e0c9d2a3b64afc3e6ac1" resolved_reference = "53874bd5c39bb3fe389663992b3efd3fedaf5697"
[[package]] [[package]]
name = "dill" name = "dill"
@ -958,8 +958,8 @@ tabulate = "^0.9.0"
[package.source] [package.source]
type = "git" type = "git"
url = "https://github.com/DS4SD/docling-core.git" url = "https://github.com/DS4SD/docling-core.git"
reference = "baceeaeaa690a12f717918d17336fcbfe414cbb8" reference = "8223654d87631ec61b9ec3570728e878d85d2ecf"
resolved_reference = "baceeaeaa690a12f717918d17336fcbfe414cbb8" resolved_reference = "8223654d87631ec61b9ec3570728e878d85d2ecf"
[[package]] [[package]]
name = "docling-ibm-models" name = "docling-ibm-models"
@ -3440,9 +3440,9 @@ files = [
[package.dependencies] [package.dependencies]
numpy = [ numpy = [
{version = ">=1.26.0", markers = "python_version >= \"3.12\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
{version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
{version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
{version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
{version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
] ]
[[package]] [[package]]
@ -3576,8 +3576,8 @@ files = [
[package.dependencies] [package.dependencies]
numpy = [ numpy = [
{version = ">=1.26.0", markers = "python_version >= \"3.12\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
{version = ">=1.22.4", markers = "python_version < \"3.11\""}, {version = ">=1.22.4", markers = "python_version < \"3.11\""},
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
] ]
python-dateutil = ">=2.8.2" python-dateutil = ">=2.8.2"
pytz = ">=2020.1" pytz = ">=2020.1"
@ -7107,4 +7107,4 @@ tesserocr = ["tesserocr"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.10" python-versions = "^3.10"
content-hash = "76695cfbcb87589dc2d8bc05b42969d558962122a9375e62ce68eed39cb0e634" content-hash = "d09e865ced8e4de077898f499cfd6e487b655e25ac2fe34b2159d91cb85b5238"

View File

@ -37,9 +37,9 @@ torchvision = [
###################### ######################
python = "^3.10" python = "^3.10"
pydantic = "^2.0.0" pydantic = "^2.0.0"
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "baceeaeaa690a12f717918d17336fcbfe414cbb8"} docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "8223654d87631ec61b9ec3570728e878d85d2ecf"}
docling-ibm-models = {git = "https://github.com/DS4SD/docling-ibm-models.git", rev = "1d2e2a2e6eb152c237f1383cdba20cf85db80b97"} docling-ibm-models = {git = "https://github.com/DS4SD/docling-ibm-models.git", rev = "1d2e2a2e6eb152c237f1383cdba20cf85db80b97"}
deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "af4557df1500d15f82a0e0c9d2a3b64afc3e6ac1"} deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "53874bd5c39bb3fe389663992b3efd3fedaf5697"}
docling-parse = "^1.5.1" docling-parse = "^1.5.1"
filetype = "^1.2.0" filetype = "^1.2.0"