fix: add table raw content when no table structure model is used (#1815)

* add table raw cells when no table structure model was used

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* Add RichTableCell instance for tables with missing structure.

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Update test GT

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2025-10-02 13:46:42 +02:00
committed by GitHub
parent f0b630e24e
commit 4f295ed051
34 changed files with 6835 additions and 3389 deletions

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "powerpoint_sample",
"origin": {
"mimetype": "application/vnd.ms-powerpoint",
@@ -1022,7 +1022,8 @@
"text": "Class1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1034,7 +1035,8 @@
"text": "Class2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1046,7 +1048,8 @@
"text": "A merged with B",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1058,7 +1061,8 @@
"text": "C",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1070,7 +1074,8 @@
"text": "A",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1082,7 +1087,8 @@
"text": "B",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1094,7 +1100,8 @@
"text": "C",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1106,7 +1113,8 @@
"text": "R1",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1118,7 +1126,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1130,7 +1139,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1142,7 +1152,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1154,7 +1165,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1166,7 +1178,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1178,7 +1191,8 @@
"text": "R2",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1190,7 +1204,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1202,7 +1217,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 2,
@@ -1214,7 +1230,8 @@
"text": "R3",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1226,7 +1243,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1238,7 +1256,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1250,7 +1269,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1262,7 +1282,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 3,
@@ -1274,7 +1295,8 @@
"text": "R4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1286,7 +1308,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1298,7 +1321,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1310,7 +1334,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1322,7 +1347,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1334,7 +1360,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1346,7 +1373,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1358,7 +1386,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1370,7 +1399,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1382,7 +1412,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1394,7 +1425,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1406,7 +1438,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1418,7 +1451,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
"num_rows": 9,
@@ -1435,7 +1469,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1447,7 +1482,8 @@
"text": "Class1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1459,7 +1495,8 @@
"text": "Class1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1471,7 +1508,8 @@
"text": "Class1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1483,7 +1521,8 @@
"text": "Class2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1495,7 +1534,8 @@
"text": "Class2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1507,7 +1547,8 @@
"text": "Class2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -1521,7 +1562,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1533,7 +1575,8 @@
"text": "A merged with B",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1545,7 +1588,8 @@
"text": "A merged with B",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1557,7 +1601,8 @@
"text": "C",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1569,7 +1614,8 @@
"text": "A",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1581,7 +1627,8 @@
"text": "B",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1593,7 +1640,8 @@
"text": "C",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -1607,7 +1655,8 @@
"text": "R1",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1619,7 +1668,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1631,7 +1681,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1643,7 +1694,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1655,7 +1707,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1667,7 +1720,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1679,7 +1733,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -1693,7 +1748,8 @@
"text": "R2",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1705,7 +1761,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1717,7 +1774,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1729,7 +1787,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1741,7 +1800,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1753,7 +1813,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1765,7 +1826,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -1779,7 +1841,8 @@
"text": "R3",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1791,7 +1854,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1803,7 +1867,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1815,7 +1880,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1827,7 +1893,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1839,7 +1906,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1851,7 +1919,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -1865,7 +1934,8 @@
"text": "R3",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1877,7 +1947,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1889,7 +1960,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1901,7 +1973,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1913,7 +1986,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1925,7 +1999,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1937,7 +2012,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -1951,7 +2027,8 @@
"text": "R4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1963,7 +2040,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1975,7 +2053,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1987,7 +2066,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1999,7 +2079,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2011,7 +2092,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2023,7 +2105,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -2037,7 +2120,8 @@
"text": "R4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2049,7 +2133,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2061,7 +2146,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2073,7 +2159,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2085,7 +2172,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2097,7 +2185,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2109,7 +2198,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -2123,7 +2213,8 @@
"text": "R4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2135,7 +2226,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2147,7 +2239,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2159,7 +2252,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2171,7 +2265,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2183,7 +2278,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2195,7 +2291,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
]
]