From 0610d01afae6e3a1c88c85894be52503d7c873e4 Mon Sep 17 00:00:00 2001 From: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Date: Tue, 7 Oct 2025 18:28:51 +0200 Subject: [PATCH] fix: enrichment of documents without pages metadata (pptx and xlsx) (#2401) fix logic for pptx and xlsx Signed-off-by: Michele Dolfi --- docling/models/base_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docling/models/base_model.py b/docling/models/base_model.py index 5d443c7b..45f1503f 100644 --- a/docling/models/base_model.py +++ b/docling/models/base_model.py @@ -173,11 +173,11 @@ class BaseItemAndImageEnrichmentModel( assert isinstance(element, DocItem) # Allow the case of documents without page images but embedded images (e.g. Word and HTML docs) - if len(element.prov) == 0 and isinstance(element, PictureItem): + if isinstance(element, PictureItem): embedded_im = element.get_image(conv_res.document) if embedded_im is not None: return ItemAndImageEnrichmentElement(item=element, image=embedded_im) - else: + elif len(element.prov) == 0: return None # Crop the image form the page