feat: Rich tables for MSWord backend (#2291)

* Adding support of rich table cells to MSWord backend

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>

* Fixes for properly accounting lists, pictures and headers in rich table cells

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>

* Cleaned up msword backend, re-generated docx tests

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>

* Added detection of simple table cells in word backend

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>

* Cleaned up

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>

---------

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
Co-authored-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
Maxim Lysak
2025-09-22 16:41:59 +02:00
committed by GitHub
parent 46efaaefee
commit e2482a2ada
27 changed files with 1103 additions and 787 deletions

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "textbox",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -491,7 +491,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Chiayi County Shuishang Township Nanjing Elementary School Affiliated Kindergarten",
"text": "Chiayi County Shuishang Township Nanjing Elementary School Affiliated Kindergarten",
@@ -510,7 +510,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Infectious Disease Reporting Procedure for the 113th Academic Year Kindergarten",
"text": "Infectious Disease Reporting Procedure for the 113th Academic Year Kindergarten",
@@ -529,7 +529,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -541,7 +541,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Student falls ill",
"text": "Student falls ill",
@@ -560,7 +560,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -593,7 +593,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -605,7 +605,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -617,7 +617,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "If a caregiver suspects that within one week, a fifth of the class (for classes with more than 15 students) or more than three students (for classes with 15 or fewer students)\nshow the same suggested reportable symptoms",
"text": "If a caregiver suspects that within one week, a fifth of the class (for classes with more than 15 students) or more than three students (for classes with 15 or fewer students)\nshow the same suggested reportable symptoms",
@@ -636,7 +636,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -648,7 +648,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -660,7 +660,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -672,7 +672,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -684,7 +684,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Yes",
"text": "Yes",
@@ -703,7 +703,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -715,7 +715,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -769,7 +769,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -781,7 +781,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -793,7 +793,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -805,7 +805,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -817,7 +817,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -829,7 +829,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -841,7 +841,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Health Bureau:",
"text": "Health Bureau:",
@@ -860,7 +860,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Upon receiving a report from the kindergarten, conduct a preliminary assessment of the case, and depending on the situation and type of illness, carry out an epidemiological investigation and report to the Centers for Disease Control.",
"text": "Upon receiving a report from the kindergarten, conduct a preliminary assessment of the case, and depending on the situation and type of illness, carry out an epidemiological investigation and report to the Centers for Disease Control.",
@@ -921,7 +921,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -933,7 +933,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -945,7 +945,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.",
"text": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.",
@@ -964,7 +964,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -976,7 +976,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -988,7 +988,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1000,7 +1000,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1012,7 +1012,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1024,7 +1024,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1036,7 +1036,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1048,7 +1048,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "The Health Bureau will handle",
"text": "The Health Bureau will handle",
@@ -1067,7 +1067,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "reporting and specimen collection",
"text": "reporting and specimen collection",
@@ -1086,7 +1086,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": ".",
"text": ".",
@@ -1105,7 +1105,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1117,7 +1117,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1129,7 +1129,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1141,7 +1141,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1153,7 +1153,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Whether the epidemic has eased.",
"text": "Whether the epidemic has eased.",
@@ -1172,7 +1172,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1184,7 +1184,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1196,7 +1196,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Whether the test results are positive for a legally designated infectious disease.",
"text": "Whether the test results are positive for a legally designated infectious disease.",
@@ -1215,7 +1215,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "No",
"text": "No",
@@ -1234,7 +1234,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1246,7 +1246,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1258,7 +1258,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Yes",
"text": "Yes",
@@ -1277,7 +1277,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1289,7 +1289,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Yes",
"text": "Yes",
@@ -1308,7 +1308,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1320,7 +1320,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1332,7 +1332,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Case closed.",
"text": "Case closed.",
@@ -1351,7 +1351,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1363,7 +1363,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary.",
"text": "The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary.",
@@ -1382,7 +1382,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1394,7 +1394,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "No",
"text": "No",
@@ -1413,7 +1413,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1425,7 +1425,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1437,7 +1437,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""