mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
Added captions for the images for SmolDocling assembly code, improved provenance definition for all elements
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
parent
d7abe1b1cd
commit
b1df461ca8
@ -399,6 +399,10 @@ class VlmPipeline(PaginatedPipeline):
|
|||||||
doc.add_table(data=table_data)
|
doc.add_table(data=table_data)
|
||||||
|
|
||||||
elif tag_name == "picture":
|
elif tag_name == "picture":
|
||||||
|
text_caption_content = extract_inner_text(full_chunk)
|
||||||
|
print("----------- TEXT CONTENT OF A PICTURE TAG -------------")
|
||||||
|
print(text_caption_content)
|
||||||
|
print("-------------------------------------------------------")
|
||||||
if image:
|
if image:
|
||||||
if bbox:
|
if bbox:
|
||||||
width, height = image.size
|
width, height = image.size
|
||||||
@ -409,7 +413,7 @@ class VlmPipeline(PaginatedPipeline):
|
|||||||
int(bbox.b * height),
|
int(bbox.b * height),
|
||||||
)
|
)
|
||||||
cropped_image = image.crop(crop_box)
|
cropped_image = image.crop(crop_box)
|
||||||
doc.add_picture(
|
pic = doc.add_picture(
|
||||||
parent=None,
|
parent=None,
|
||||||
image=ImageRef.from_pil(image=cropped_image, dpi=72),
|
image=ImageRef.from_pil(image=cropped_image, dpi=72),
|
||||||
prov=(
|
prov=(
|
||||||
@ -418,18 +422,35 @@ class VlmPipeline(PaginatedPipeline):
|
|||||||
)
|
)
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
# If there is a caption to an image, add it as well
|
||||||
|
if len(text_caption_content) > 0:
|
||||||
|
caption_item = doc.add_text(
|
||||||
|
label=DocItemLabel.CAPTION,
|
||||||
|
text=text_caption_content,
|
||||||
|
parent=None,
|
||||||
|
)
|
||||||
|
pic.captions.append(caption_item.get_ref())
|
||||||
else:
|
else:
|
||||||
if bbox:
|
if bbox:
|
||||||
|
# In case we don't have access to an binary of an image
|
||||||
doc.add_picture(
|
doc.add_picture(
|
||||||
parent=None,
|
parent=None,
|
||||||
prov=ProvenanceItem(
|
prov=ProvenanceItem(
|
||||||
bbox=bbox, charspan=(0, 0), page_no=page_no
|
bbox=bbox, charspan=(0, 0), page_no=page_no
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
# If there is a caption to an image, add it as well
|
||||||
|
if len(text_caption_content) > 0:
|
||||||
|
caption_item = doc.add_text(
|
||||||
|
label=DocItemLabel.CAPTION,
|
||||||
|
text=text_caption_content,
|
||||||
|
parent=None,
|
||||||
|
)
|
||||||
|
pic.captions.append(caption_item.get_ref())
|
||||||
else:
|
else:
|
||||||
# For everything else, treat as text
|
# For everything else, treat as text
|
||||||
if self.force_backend_text:
|
if self.force_backend_text:
|
||||||
content = extract_text_from_backend(page, bbox)
|
text_content = extract_text_from_backend(page, bbox)
|
||||||
else:
|
else:
|
||||||
text_content = extract_inner_text(full_chunk)
|
text_content = extract_inner_text(full_chunk)
|
||||||
# If it's code, wrap it with <pre><code> tags
|
# If it's code, wrap it with <pre><code> tags
|
||||||
@ -439,7 +460,11 @@ class VlmPipeline(PaginatedPipeline):
|
|||||||
label=doc_label,
|
label=doc_label,
|
||||||
text=text_content,
|
text=text_content,
|
||||||
prov=(
|
prov=(
|
||||||
ProvenanceItem(bbox=bbox, charspan=(0, 0), page_no=page_no)
|
ProvenanceItem(
|
||||||
|
bbox=bbox,
|
||||||
|
charspan=(0, len(text_content)),
|
||||||
|
page_no=page_no,
|
||||||
|
)
|
||||||
if bbox
|
if bbox
|
||||||
else None
|
else None
|
||||||
),
|
),
|
||||||
|
Loading…
Reference in New Issue
Block a user