mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 15:02:21 +00:00
proceed processing the content of single cell table as if its just part of the body
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
parent
f7b58dfa51
commit
b46ae1af56
@ -130,7 +130,6 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
||||
def walk_linear(self, body, docx_obj, doc) -> DoclingDocument:
|
||||
for element in body:
|
||||
tag_name = etree.QName(element).localname
|
||||
|
||||
# Check for Inline Images (drawings or blip elements)
|
||||
found_drawing = etree.ElementBase.xpath(
|
||||
element, ".//w:drawing", namespaces=self.xml_namespaces
|
||||
@ -164,8 +163,6 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
||||
return default
|
||||
|
||||
def get_numId_and_ilvl(self, paragraph):
|
||||
if not hasattr(paragraph._element, "find"):
|
||||
return None, None
|
||||
# Access the XML element of the paragraph
|
||||
numPr = paragraph._element.find(
|
||||
".//w:numPr", namespaces=paragraph._element.nsmap
|
||||
@ -448,17 +445,13 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
||||
for row in table.rows:
|
||||
# Calculate the max number of columns
|
||||
num_cols = max(num_cols, sum(get_colspan(cell) for cell in row.cells))
|
||||
# if row.cells:
|
||||
# num_cols = max(num_cols, len(row.cells))
|
||||
|
||||
print("num_rows = {}, num_cols = {}".format(num_rows, num_cols))
|
||||
if num_rows == 1:
|
||||
if num_cols == 1:
|
||||
cell_element = table.rows[0].cells[0]
|
||||
for paragraph in cell_element.paragraphs:
|
||||
# print(paragraph.text)
|
||||
self.handle_text_elements(paragraph, docx_obj, doc)
|
||||
return
|
||||
if num_rows == 1 and num_cols == 1:
|
||||
cell_element = table.rows[0].cells[0]
|
||||
# In case we have a table of only 1 cell, we consider it furniture
|
||||
# And proceed processing the content of the cell as though it's in the document body
|
||||
self.walk_linear(cell_element._element, docx_obj, doc)
|
||||
return
|
||||
|
||||
# Initialize the table grid
|
||||
table_grid = [[None for _ in range(num_cols)] for _ in range(num_rows)]
|
||||
|
Loading…
Reference in New Issue
Block a user