fix: Empty table handling (#2365)

* add table raw cells when no table structure model was used

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* Add RichTableCell instance for tables with missing structure.

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Update test GT

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* update test results

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-10-02 19:35:16 +02:00
committed by GitHub
parent e6c3b05e63
commit ca2be7ff3a
4 changed files with 19 additions and 60 deletions

View File

@@ -218,7 +218,7 @@
<picture><loc_67><loc_92><loc_205><loc_308></picture>
<caption><loc_52><loc_317><loc_223><loc_323>Figure 8: Example of a table with multi-line header.</caption>
<picture><loc_252><loc_59><loc_455><loc_185><caption><loc_252><loc_194><loc_445><loc_207>Figure 9: Example of a table with big empty distance between cells.</caption></picture>
<otsl><loc_274><loc_286><loc_400><loc_317><ecel><nl></otsl>
<otsl><loc_274><loc_286><loc_400><loc_317></otsl>
<picture><loc_273><loc_239><loc_424><loc_420><caption><loc_255><loc_430><loc_443><loc_435>Figure 10: Example of a complex table with empty cells.</caption></picture>
<page_footer><loc_239><loc_464><loc_247><loc_469>13</page_footer>
<page_break>

View File

@@ -829,16 +829,6 @@
"content_layer": "body",
"name": "list",
"label": "list"
},
{
"self_ref": "#/groups/12",
"parent": {
"$ref": "#/tables/6"
},
"children": [],
"content_layer": "body",
"name": "rich_cell_group_7_0_0",
"label": "unspecified"
}
],
"texts": [
@@ -25418,11 +25408,7 @@
"parent": {
"$ref": "#/body"
},
"children": [
{
"$ref": "#/groups/12"
}
],
"children": [],
"content_layer": "body",
"label": "table",
"prov": [
@@ -25445,43 +25431,10 @@
"references": [],
"footnotes": [],
"data": {
"table_cells": [
{
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 0,
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "",
"column_header": false,
"row_header": false,
"row_section": false,
"fillable": false,
"ref": {
"$ref": "#/groups/12"
}
}
],
"num_rows": 1,
"num_cols": 1,
"grid": [
[
{
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 0,
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "",
"column_header": false,
"row_header": false,
"row_section": false,
"fillable": false
}
]
]
"table_cells": [],
"num_rows": 0,
"num_cols": 0,
"grid": []
},
"annotations": []
}

View File

@@ -367,9 +367,6 @@ Figure 9: Example of a table with big empty distance between cells.
<!-- image -->
| |
|----|
Figure 10: Example of a complex table with empty cells.
<!-- image -->