diff --git a/docling/models/readingorder_model.py b/docling/models/readingorder_model.py index 68fe54b9..a25ca3db 100644 --- a/docling/models/readingorder_model.py +++ b/docling/models/readingorder_model.py @@ -216,8 +216,13 @@ class ReadingOrderModel: elif isinstance(element, Table): # Check if table has no structure prediction if element.num_rows == 0 and element.num_cols == 0: - # Create minimal 1x1 table with rich cell containing all children - tbl_data = TableData(num_rows=1, num_cols=1, table_cells=[]) + # Only create 1x1 table if there are children to put in it + if element.cluster.children: + # Create minimal 1x1 table with rich cell containing all children + tbl_data = TableData(num_rows=1, num_cols=1, table_cells=[]) + else: + # Create empty table with no structure + tbl_data = TableData(num_rows=0, num_cols=0, table_cells=[]) else: tbl_data = TableData( num_rows=element.num_rows, @@ -253,8 +258,12 @@ class ReadingOrderModel: tbl.footnotes.append(new_footnote_item.get_ref()) - # Handle case where table has no structure prediction - if element.num_rows == 0 and element.num_cols == 0: + # Handle case where table has no structure prediction but has children + if ( + element.num_rows == 0 + and element.num_cols == 0 + and element.cluster.children + ): # Create rich cell containing all child elements rich_cell_ref = self._create_rich_cell_group(element, out_doc, tbl) diff --git a/tests/data/groundtruth/docling_v2/2203.01017v2.doctags.txt b/tests/data/groundtruth/docling_v2/2203.01017v2.doctags.txt index 7d5e5ad9..fc699de5 100644 --- a/tests/data/groundtruth/docling_v2/2203.01017v2.doctags.txt +++ b/tests/data/groundtruth/docling_v2/2203.01017v2.doctags.txt @@ -218,7 +218,7 @@ Figure 8: Example of a table with multi-line header. Figure 9: Example of a table with big empty distance between cells. - + Figure 10: Example of a complex table with empty cells. 13 diff --git a/tests/data/groundtruth/docling_v2/2203.01017v2.json b/tests/data/groundtruth/docling_v2/2203.01017v2.json index 0e63dc10..10194df1 100644 --- a/tests/data/groundtruth/docling_v2/2203.01017v2.json +++ b/tests/data/groundtruth/docling_v2/2203.01017v2.json @@ -829,16 +829,6 @@ "content_layer": "body", "name": "list", "label": "list" - }, - { - "self_ref": "#/groups/12", - "parent": { - "$ref": "#/tables/6" - }, - "children": [], - "content_layer": "body", - "name": "rich_cell_group_7_0_0", - "label": "unspecified" } ], "texts": [ @@ -25418,11 +25408,7 @@ "parent": { "$ref": "#/body" }, - "children": [ - { - "$ref": "#/groups/12" - } - ], + "children": [], "content_layer": "body", "label": "table", "prov": [ @@ -25445,43 +25431,10 @@ "references": [], "footnotes": [], "data": { - "table_cells": [ - { - "row_span": 1, - "col_span": 1, - "start_row_offset_idx": 0, - "end_row_offset_idx": 1, - "start_col_offset_idx": 0, - "end_col_offset_idx": 1, - "text": "", - "column_header": false, - "row_header": false, - "row_section": false, - "fillable": false, - "ref": { - "$ref": "#/groups/12" - } - } - ], - "num_rows": 1, - "num_cols": 1, - "grid": [ - [ - { - "row_span": 1, - "col_span": 1, - "start_row_offset_idx": 0, - "end_row_offset_idx": 1, - "start_col_offset_idx": 0, - "end_col_offset_idx": 1, - "text": "", - "column_header": false, - "row_header": false, - "row_section": false, - "fillable": false - } - ] - ] + "table_cells": [], + "num_rows": 0, + "num_cols": 0, + "grid": [] }, "annotations": [] } diff --git a/tests/data/groundtruth/docling_v2/2203.01017v2.md b/tests/data/groundtruth/docling_v2/2203.01017v2.md index f1823f6d..152a4b52 100644 --- a/tests/data/groundtruth/docling_v2/2203.01017v2.md +++ b/tests/data/groundtruth/docling_v2/2203.01017v2.md @@ -367,9 +367,6 @@ Figure 9: Example of a table with big empty distance between cells. -| | -|----| - Figure 10: Example of a complex table with empty cells.