From 0610d01afae6e3a1c88c85894be52503d7c873e4 Mon Sep 17 00:00:00 2001
From: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com>
Date: Tue, 7 Oct 2025 18:28:51 +0200
Subject: [PATCH] fix: enrichment of documents without pages metadata (pptx and
 xlsx) (#2401)

fix logic for pptx and xlsx

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
---
 docling/models/base_model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docling/models/base_model.py b/docling/models/base_model.py
index 5d443c7b..45f1503f 100644
--- a/docling/models/base_model.py
+++ b/docling/models/base_model.py
@@ -173,11 +173,11 @@ class BaseItemAndImageEnrichmentModel(
         assert isinstance(element, DocItem)
 
         # Allow the case of documents without page images but embedded images (e.g. Word and HTML docs)
-        if len(element.prov) == 0 and isinstance(element, PictureItem):
+        if isinstance(element, PictureItem):
             embedded_im = element.get_image(conv_res.document)
             if embedded_im is not None:
                 return ItemAndImageEnrichmentElement(item=element, image=embedded_im)
-            else:
+            elif len(element.prov) == 0:
                 return None
 
         # Crop the image form the page