fix: enrichment of documents without pages metadata (pptx and xlsx) (#2401)

fix logic for pptx and xlsx Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-12-08 12:48:28 +00:00 · 2025-10-07 18:28:51 +02:00
parent 9705f4020c
commit 0610d01afa
1 changed files with 2 additions and 2 deletions
--- a/docling/models/base_model.py
+++ b/docling/models/base_model.py
@@ -173,11 +173,11 @@ class BaseItemAndImageEnrichmentModel(
        assert isinstance(element, DocItem)

        # Allow the case of documents without page images but embedded images (e.g. Word and HTML docs)
-        if len(element.prov) == 0 and isinstance(element, PictureItem):
+        if isinstance(element, PictureItem):
            embedded_im = element.get_image(conv_res.document)
            if embedded_im is not None:
                return ItemAndImageEnrichmentElement(item=element, image=embedded_im)
-            else:
+            elif len(element.prov) == 0:
                return None

        # Crop the image form the page