mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
fix: Empty table handling (#2365)
* add table raw cells when no table structure model was used Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Add RichTableCell instance for tables with missing structure. Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update test GT Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * update test results Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -216,8 +216,13 @@ class ReadingOrderModel:
|
|||||||
elif isinstance(element, Table):
|
elif isinstance(element, Table):
|
||||||
# Check if table has no structure prediction
|
# Check if table has no structure prediction
|
||||||
if element.num_rows == 0 and element.num_cols == 0:
|
if element.num_rows == 0 and element.num_cols == 0:
|
||||||
# Create minimal 1x1 table with rich cell containing all children
|
# Only create 1x1 table if there are children to put in it
|
||||||
tbl_data = TableData(num_rows=1, num_cols=1, table_cells=[])
|
if element.cluster.children:
|
||||||
|
# Create minimal 1x1 table with rich cell containing all children
|
||||||
|
tbl_data = TableData(num_rows=1, num_cols=1, table_cells=[])
|
||||||
|
else:
|
||||||
|
# Create empty table with no structure
|
||||||
|
tbl_data = TableData(num_rows=0, num_cols=0, table_cells=[])
|
||||||
else:
|
else:
|
||||||
tbl_data = TableData(
|
tbl_data = TableData(
|
||||||
num_rows=element.num_rows,
|
num_rows=element.num_rows,
|
||||||
@@ -253,8 +258,12 @@ class ReadingOrderModel:
|
|||||||
|
|
||||||
tbl.footnotes.append(new_footnote_item.get_ref())
|
tbl.footnotes.append(new_footnote_item.get_ref())
|
||||||
|
|
||||||
# Handle case where table has no structure prediction
|
# Handle case where table has no structure prediction but has children
|
||||||
if element.num_rows == 0 and element.num_cols == 0:
|
if (
|
||||||
|
element.num_rows == 0
|
||||||
|
and element.num_cols == 0
|
||||||
|
and element.cluster.children
|
||||||
|
):
|
||||||
# Create rich cell containing all child elements
|
# Create rich cell containing all child elements
|
||||||
rich_cell_ref = self._create_rich_cell_group(element, out_doc, tbl)
|
rich_cell_ref = self._create_rich_cell_group(element, out_doc, tbl)
|
||||||
|
|
||||||
|
|||||||
@@ -218,7 +218,7 @@
|
|||||||
<picture><loc_67><loc_92><loc_205><loc_308></picture>
|
<picture><loc_67><loc_92><loc_205><loc_308></picture>
|
||||||
<caption><loc_52><loc_317><loc_223><loc_323>Figure 8: Example of a table with multi-line header.</caption>
|
<caption><loc_52><loc_317><loc_223><loc_323>Figure 8: Example of a table with multi-line header.</caption>
|
||||||
<picture><loc_252><loc_59><loc_455><loc_185><caption><loc_252><loc_194><loc_445><loc_207>Figure 9: Example of a table with big empty distance between cells.</caption></picture>
|
<picture><loc_252><loc_59><loc_455><loc_185><caption><loc_252><loc_194><loc_445><loc_207>Figure 9: Example of a table with big empty distance between cells.</caption></picture>
|
||||||
<otsl><loc_274><loc_286><loc_400><loc_317><ecel><nl></otsl>
|
<otsl><loc_274><loc_286><loc_400><loc_317></otsl>
|
||||||
<picture><loc_273><loc_239><loc_424><loc_420><caption><loc_255><loc_430><loc_443><loc_435>Figure 10: Example of a complex table with empty cells.</caption></picture>
|
<picture><loc_273><loc_239><loc_424><loc_420><caption><loc_255><loc_430><loc_443><loc_435>Figure 10: Example of a complex table with empty cells.</caption></picture>
|
||||||
<page_footer><loc_239><loc_464><loc_247><loc_469>13</page_footer>
|
<page_footer><loc_239><loc_464><loc_247><loc_469>13</page_footer>
|
||||||
<page_break>
|
<page_break>
|
||||||
|
|||||||
@@ -829,16 +829,6 @@
|
|||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"name": "list",
|
"name": "list",
|
||||||
"label": "list"
|
"label": "list"
|
||||||
},
|
|
||||||
{
|
|
||||||
"self_ref": "#/groups/12",
|
|
||||||
"parent": {
|
|
||||||
"$ref": "#/tables/6"
|
|
||||||
},
|
|
||||||
"children": [],
|
|
||||||
"content_layer": "body",
|
|
||||||
"name": "rich_cell_group_7_0_0",
|
|
||||||
"label": "unspecified"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"texts": [
|
"texts": [
|
||||||
@@ -25418,11 +25408,7 @@
|
|||||||
"parent": {
|
"parent": {
|
||||||
"$ref": "#/body"
|
"$ref": "#/body"
|
||||||
},
|
},
|
||||||
"children": [
|
"children": [],
|
||||||
{
|
|
||||||
"$ref": "#/groups/12"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
"label": "table",
|
"label": "table",
|
||||||
"prov": [
|
"prov": [
|
||||||
@@ -25445,43 +25431,10 @@
|
|||||||
"references": [],
|
"references": [],
|
||||||
"footnotes": [],
|
"footnotes": [],
|
||||||
"data": {
|
"data": {
|
||||||
"table_cells": [
|
"table_cells": [],
|
||||||
{
|
"num_rows": 0,
|
||||||
"row_span": 1,
|
"num_cols": 0,
|
||||||
"col_span": 1,
|
"grid": []
|
||||||
"start_row_offset_idx": 0,
|
|
||||||
"end_row_offset_idx": 1,
|
|
||||||
"start_col_offset_idx": 0,
|
|
||||||
"end_col_offset_idx": 1,
|
|
||||||
"text": "",
|
|
||||||
"column_header": false,
|
|
||||||
"row_header": false,
|
|
||||||
"row_section": false,
|
|
||||||
"fillable": false,
|
|
||||||
"ref": {
|
|
||||||
"$ref": "#/groups/12"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"num_rows": 1,
|
|
||||||
"num_cols": 1,
|
|
||||||
"grid": [
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"row_span": 1,
|
|
||||||
"col_span": 1,
|
|
||||||
"start_row_offset_idx": 0,
|
|
||||||
"end_row_offset_idx": 1,
|
|
||||||
"start_col_offset_idx": 0,
|
|
||||||
"end_col_offset_idx": 1,
|
|
||||||
"text": "",
|
|
||||||
"column_header": false,
|
|
||||||
"row_header": false,
|
|
||||||
"row_section": false,
|
|
||||||
"fillable": false
|
|
||||||
}
|
|
||||||
]
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"annotations": []
|
"annotations": []
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -367,9 +367,6 @@ Figure 9: Example of a table with big empty distance between cells.
|
|||||||
|
|
||||||
<!-- image -->
|
<!-- image -->
|
||||||
|
|
||||||
| |
|
|
||||||
|----|
|
|
||||||
|
|
||||||
Figure 10: Example of a complex table with empty cells.
|
Figure 10: Example of a complex table with empty cells.
|
||||||
|
|
||||||
<!-- image -->
|
<!-- image -->
|
||||||
|
|||||||
Reference in New Issue
Block a user