mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
apply changes to the picture data annotations
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
dd2982cce1
commit
d5f161d0f5
@ -7,7 +7,6 @@ from bs4 import BeautifulSoup
|
|||||||
from docling_core.types.doc import (
|
from docling_core.types.doc import (
|
||||||
DescriptionItem,
|
DescriptionItem,
|
||||||
DoclingDocument,
|
DoclingDocument,
|
||||||
PictureData,
|
|
||||||
TableCell,
|
TableCell,
|
||||||
TableData,
|
TableData,
|
||||||
)
|
)
|
||||||
@ -406,9 +405,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
|
|
||||||
contains_captions = element.find(["figcaption"])
|
contains_captions = element.find(["figcaption"])
|
||||||
if contains_captions is None:
|
if contains_captions is None:
|
||||||
doc.add_picture(
|
doc.add_picture(parent=self.parents[self.level], caption=None)
|
||||||
data=PictureData(), parent=self.parents[self.level], caption=None
|
|
||||||
)
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
texts = []
|
texts = []
|
||||||
@ -419,13 +416,10 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
label=DocItemLabel.CAPTION, text=("".join(texts)).strip()
|
label=DocItemLabel.CAPTION, text=("".join(texts)).strip()
|
||||||
)
|
)
|
||||||
doc.add_picture(
|
doc.add_picture(
|
||||||
data=PictureData(),
|
|
||||||
parent=self.parents[self.level],
|
parent=self.parents[self.level],
|
||||||
caption=fig_caption,
|
caption=fig_caption,
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle_image(self, element, idx, doc):
|
def handle_image(self, element, idx, doc):
|
||||||
"""Handles image tags (img)."""
|
"""Handles image tags (img)."""
|
||||||
doc.add_picture(
|
doc.add_picture(parent=self.parents[self.level], caption=None)
|
||||||
data=PictureData(), parent=self.parents[self.level], caption=None
|
|
||||||
)
|
|
||||||
|
@ -9,7 +9,6 @@ from docling_core.types.doc import (
|
|||||||
DoclingDocument,
|
DoclingDocument,
|
||||||
DocumentOrigin,
|
DocumentOrigin,
|
||||||
GroupLabel,
|
GroupLabel,
|
||||||
PictureData,
|
|
||||||
ProvenanceItem,
|
ProvenanceItem,
|
||||||
TableCell,
|
TableCell,
|
||||||
TableData,
|
TableData,
|
||||||
@ -243,9 +242,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
|||||||
def handle_pictures(self, shape, parent_slide, slide_ind, doc):
|
def handle_pictures(self, shape, parent_slide, slide_ind, doc):
|
||||||
# shape has picture
|
# shape has picture
|
||||||
prov = self.generate_prov(shape, slide_ind, "")
|
prov = self.generate_prov(shape, slide_ind, "")
|
||||||
doc.add_picture(
|
doc.add_picture(parent=parent_slide, caption=None, prov=prov)
|
||||||
data=PictureData(), parent=parent_slide, caption=None, prov=prov
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
def handle_tables(self, shape, parent_slide, slide_ind, doc):
|
def handle_tables(self, shape, parent_slide, slide_ind, doc):
|
||||||
|
@ -9,7 +9,6 @@ from docling_core.types.doc import (
|
|||||||
DocItemLabel,
|
DocItemLabel,
|
||||||
DoclingDocument,
|
DoclingDocument,
|
||||||
GroupLabel,
|
GroupLabel,
|
||||||
PictureData,
|
|
||||||
TableCell,
|
TableCell,
|
||||||
TableData,
|
TableData,
|
||||||
)
|
)
|
||||||
@ -492,7 +491,5 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
return
|
return
|
||||||
|
|
||||||
def handle_pictures(self, element, docx_obj, doc):
|
def handle_pictures(self, element, docx_obj, doc):
|
||||||
doc.add_picture(
|
doc.add_picture(parent=self.parents[self.level], caption=None)
|
||||||
data=PictureData(), parent=self.parents[self.level], caption=None
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
|
@ -3,7 +3,7 @@ from io import BytesIO
|
|||||||
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union
|
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union
|
||||||
|
|
||||||
from docling_core.types.doc import BoundingBox, Size
|
from docling_core.types.doc import BoundingBox, Size
|
||||||
from docling_core.types.doc.document import PictureData, TableCell
|
from docling_core.types.doc.document import PictureDataType, TableCell
|
||||||
from docling_core.types.doc.labels import DocItemLabel
|
from docling_core.types.doc.labels import DocItemLabel
|
||||||
from PIL.Image import Image
|
from PIL.Image import Image
|
||||||
from pydantic import BaseModel, ConfigDict
|
from pydantic import BaseModel, ConfigDict
|
||||||
@ -131,7 +131,7 @@ class TextElement(BasePageElement):
|
|||||||
|
|
||||||
|
|
||||||
class FigureElement(BasePageElement):
|
class FigureElement(BasePageElement):
|
||||||
data: Optional[PictureData] = None
|
annotations: List[PictureDataType] = []
|
||||||
provenance: Optional[str] = None
|
provenance: Optional[str] = None
|
||||||
predicted_class: Optional[str] = None
|
predicted_class: Optional[str] = None
|
||||||
confidence: Optional[float] = None
|
confidence: Optional[float] = None
|
||||||
|
@ -36,10 +36,10 @@ class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel):
|
|||||||
# uncomment this to interactively visualize the image
|
# uncomment this to interactively visualize the image
|
||||||
# element.image.pil_image.show()
|
# element.image.pil_image.show()
|
||||||
|
|
||||||
element.data.classification = PictureClassificationData(
|
element.annotations.append(PictureClassificationData(
|
||||||
provenance="example_classifier-0.0.1",
|
provenance="example_classifier-0.0.1",
|
||||||
predicted_classes=[PictureClassificationClass(class_name="dummy", confidence=0.42)]
|
predicted_classes=[PictureClassificationClass(class_name="dummy", confidence=0.42)]
|
||||||
)
|
))
|
||||||
|
|
||||||
yield element
|
yield element
|
||||||
|
|
||||||
@ -83,7 +83,7 @@ def main():
|
|||||||
for element, _level in result.document.iterate_items():
|
for element, _level in result.document.iterate_items():
|
||||||
if isinstance(element, PictureItem):
|
if isinstance(element, PictureItem):
|
||||||
print(
|
print(
|
||||||
f"The model populated the `data` portion of picture {element.self_ref}:\n{element.data}"
|
f"The model populated the `data` portion of picture {element.self_ref}:\n{element.annotations}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
12
poetry.lock
generated
12
poetry.lock
generated
@ -898,7 +898,7 @@ files = []
|
|||||||
develop = false
|
develop = false
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "33aa21408400c9c475db0f8c6be681b888388284"}
|
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "002f784745bf2e2bcf9def81d070c59f2e7c61c2"}
|
||||||
docutils = "!=0.21"
|
docutils = "!=0.21"
|
||||||
matplotlib = "^3.7.1"
|
matplotlib = "^3.7.1"
|
||||||
networkx = "^3.1"
|
networkx = "^3.1"
|
||||||
@ -922,8 +922,8 @@ toolkit = ["deepsearch-toolkit (>=0.31.0)"]
|
|||||||
[package.source]
|
[package.source]
|
||||||
type = "git"
|
type = "git"
|
||||||
url = "https://github.com/DS4SD/deepsearch-glm.git"
|
url = "https://github.com/DS4SD/deepsearch-glm.git"
|
||||||
reference = "8ab1b4372122c820a28badd3c6095c2ce2feaf61"
|
reference = "f219bbfb8065e787b481d6b12ca22db8e31e865e"
|
||||||
resolved_reference = "8ab1b4372122c820a28badd3c6095c2ce2feaf61"
|
resolved_reference = "f219bbfb8065e787b481d6b12ca22db8e31e865e"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "defusedxml"
|
name = "defusedxml"
|
||||||
@ -982,8 +982,8 @@ tabulate = "^0.9.0"
|
|||||||
[package.source]
|
[package.source]
|
||||||
type = "git"
|
type = "git"
|
||||||
url = "https://github.com/DS4SD/docling-core.git"
|
url = "https://github.com/DS4SD/docling-core.git"
|
||||||
reference = "33aa21408400c9c475db0f8c6be681b888388284"
|
reference = "002f784745bf2e2bcf9def81d070c59f2e7c61c2"
|
||||||
resolved_reference = "33aa21408400c9c475db0f8c6be681b888388284"
|
resolved_reference = "002f784745bf2e2bcf9def81d070c59f2e7c61c2"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "docling-ibm-models"
|
name = "docling-ibm-models"
|
||||||
@ -7496,4 +7496,4 @@ tesserocr = ["tesserocr"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.10"
|
python-versions = "^3.10"
|
||||||
content-hash = "70620592368cfa1a6a8a7e32e1f98f5f9f253f0d99f7a8bdfb6c46a0363b2408"
|
content-hash = "3994b9c2200bb9827c76d84128fd7bbe1c1cc6f8e6cf1e34f9923c5511bc324a"
|
||||||
|
@ -37,9 +37,9 @@ torchvision = [
|
|||||||
######################
|
######################
|
||||||
python = "^3.10"
|
python = "^3.10"
|
||||||
pydantic = "^2.0.0"
|
pydantic = "^2.0.0"
|
||||||
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "33aa21408400c9c475db0f8c6be681b888388284"}
|
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "002f784745bf2e2bcf9def81d070c59f2e7c61c2"}
|
||||||
docling-ibm-models = "^2.0.1"
|
docling-ibm-models = "^2.0.1"
|
||||||
deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "8ab1b4372122c820a28badd3c6095c2ce2feaf61"}
|
deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "f219bbfb8065e787b481d6b12ca22db8e31e865e"}
|
||||||
filetype = "^1.2.0"
|
filetype = "^1.2.0"
|
||||||
pypdfium2 = "^4.30.0"
|
pypdfium2 = "^4.30.0"
|
||||||
pydantic-settings = "^2.3.0"
|
pydantic-settings = "^2.3.0"
|
||||||
|
Loading…
Reference in New Issue
Block a user