mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
fix: enrichment of documents without pages metadata (pptx and xlsx) (#2401)
fix logic for pptx and xlsx Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -173,11 +173,11 @@ class BaseItemAndImageEnrichmentModel(
|
|||||||
assert isinstance(element, DocItem)
|
assert isinstance(element, DocItem)
|
||||||
|
|
||||||
# Allow the case of documents without page images but embedded images (e.g. Word and HTML docs)
|
# Allow the case of documents without page images but embedded images (e.g. Word and HTML docs)
|
||||||
if len(element.prov) == 0 and isinstance(element, PictureItem):
|
if isinstance(element, PictureItem):
|
||||||
embedded_im = element.get_image(conv_res.document)
|
embedded_im = element.get_image(conv_res.document)
|
||||||
if embedded_im is not None:
|
if embedded_im is not None:
|
||||||
return ItemAndImageEnrichmentElement(item=element, image=embedded_im)
|
return ItemAndImageEnrichmentElement(item=element, image=embedded_im)
|
||||||
else:
|
elif len(element.prov) == 0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Crop the image form the page
|
# Crop the image form the page
|
||||||
|
|||||||
Reference in New Issue
Block a user