mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
fix: enrichment of documents without pages metadata (pptx and xlsx) (#2401)
fix logic for pptx and xlsx Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -173,11 +173,11 @@ class BaseItemAndImageEnrichmentModel(
|
||||
assert isinstance(element, DocItem)
|
||||
|
||||
# Allow the case of documents without page images but embedded images (e.g. Word and HTML docs)
|
||||
if len(element.prov) == 0 and isinstance(element, PictureItem):
|
||||
if isinstance(element, PictureItem):
|
||||
embedded_im = element.get_image(conv_res.document)
|
||||
if embedded_im is not None:
|
||||
return ItemAndImageEnrichmentElement(item=element, image=embedded_im)
|
||||
else:
|
||||
elif len(element.prov) == 0:
|
||||
return None
|
||||
|
||||
# Crop the image form the page
|
||||
|
||||
Reference in New Issue
Block a user