mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
fix(msword_backend): Identify text in the same line after an image / image anchor #1425
Signed-off-by: Michael Krissgau <michael.krissgau@ibm.com>
This commit is contained in:
parent
45265bf8b1
commit
af4aaa28af
@ -253,9 +253,15 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
||||
self._handle_tables(element, docx_obj, doc)
|
||||
except Exception:
|
||||
_log.debug("could not parse a table, broken docx table")
|
||||
|
||||
# Check for Image
|
||||
elif drawing_blip:
|
||||
self._handle_pictures(docx_obj, drawing_blip, doc)
|
||||
# Check for Text after the Image
|
||||
if (
|
||||
tag_name in ["p"]
|
||||
or element.find(".//w:p", namespaces=namespaces) is not None
|
||||
):
|
||||
self._handle_text_elements(element, docx_obj, doc)
|
||||
# Check for the sdt containers, like table of contents
|
||||
elif tag_name in ["sdt"]:
|
||||
sdt_content = element.find(".//w:sdtContent", namespaces=namespaces)
|
||||
|
Loading…
Reference in New Issue
Block a user