From af4aaa28af83fd6e829448d75d0ac2b14bd18dcf Mon Sep 17 00:00:00 2001
From: Michael Krissgau <michael.krissgau@ibm.com>
Date: Thu, 22 May 2025 17:45:15 +0200
Subject: [PATCH] fix(msword_backend): Identify text in the same line after an
 image / image anchor #1425

Signed-off-by: Michael Krissgau <michael.krissgau@ibm.com>
---
 docling/backend/msword_backend.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/docling/backend/msword_backend.py b/docling/backend/msword_backend.py
index 6cfa0860..9a4d0396 100644
--- a/docling/backend/msword_backend.py
+++ b/docling/backend/msword_backend.py
@@ -253,9 +253,15 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
                     self._handle_tables(element, docx_obj, doc)
                 except Exception:
                     _log.debug("could not parse a table, broken docx table")
-
+            # Check for Image
             elif drawing_blip:
                 self._handle_pictures(docx_obj, drawing_blip, doc)
+                # Check for Text after the Image
+                if (
+                    tag_name in ["p"]
+                    or element.find(".//w:p", namespaces=namespaces) is not None
+                ):
+                    self._handle_text_elements(element, docx_obj, doc)
             # Check for the sdt containers, like table of contents
             elif tag_name in ["sdt"]:
                 sdt_content = element.find(".//w:sdtContent", namespaces=namespaces)