mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
fix: find paragraphs in elements with images in docx
Signed-off-by: Manuel030 <manuelenrique.plank@gmail.com>
This commit is contained in:
parent
d8959c6b19
commit
387dd659c1
@ -123,6 +123,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
||||
doc = DoclingDocument(name=self.file.stem or "file", origin=origin)
|
||||
if self.is_valid():
|
||||
assert self.docx_obj is not None
|
||||
|
||||
doc = self._walk_linear(self.docx_obj.element.body, self.docx_obj, doc)
|
||||
return doc
|
||||
else:
|
||||
@ -188,6 +189,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
||||
|
||||
elif drawing_blip:
|
||||
self._handle_pictures(docx_obj, drawing_blip, doc)
|
||||
self._handle_text_elements(element, docx_obj, doc)
|
||||
# Check for the sdt containers, like table of contents
|
||||
elif tag_name in ["sdt"]:
|
||||
sdt_content = element.find(".//w:sdtContent", namespaces=namespaces)
|
||||
|
Loading…
Reference in New Issue
Block a user