diff --git a/tests/data/docx/table_with_equations.docx b/tests/data/docx/table_with_equations.docx new file mode 100644 index 00000000..151c03b2 Binary files /dev/null and b/tests/data/docx/table_with_equations.docx differ diff --git a/tests/data/groundtruth/docling_v2/table_with_equations.docx.itxt b/tests/data/groundtruth/docling_v2/table_with_equations.docx.itxt new file mode 100644 index 00000000..8b54db7c --- /dev/null +++ b/tests/data/groundtruth/docling_v2/table_with_equations.docx.itxt @@ -0,0 +1,3 @@ +item-0 at level 0: unspecified: group _root_ + item-1 at level 1: table with [2x2] + item-2 at level 1: paragraph: \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/table_with_equations.docx.json b/tests/data/groundtruth/docling_v2/table_with_equations.docx.json new file mode 100644 index 00000000..fc8f9780 --- /dev/null +++ b/tests/data/groundtruth/docling_v2/table_with_equations.docx.json @@ -0,0 +1,174 @@ +{ + "schema_name": "DoclingDocument", + "version": "1.5.0", + "name": "table_with_equations", + "origin": { + "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "binary_hash": 6528760837820727976, + "filename": "table_with_equations.docx" + }, + "furniture": { + "self_ref": "#/furniture", + "children": [], + "content_layer": "furniture", + "name": "_root_", + "label": "unspecified" + }, + "body": { + "self_ref": "#/body", + "children": [ + { + "$ref": "#/tables/0" + }, + { + "$ref": "#/texts/0" + } + ], + "content_layer": "body", + "name": "_root_", + "label": "unspecified" + }, + "groups": [], + "texts": [ + { + "self_ref": "#/texts/0", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + } + ], + "pictures": [], + "tables": [ + { + "self_ref": "#/tables/0", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "The next cell has an equation", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "$A= \\pi r^{2}$", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "The next cell has another equation", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "$x=\\frac{-b \\pm \\sqrt{b^{2}-4ac}}{2a}$", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 2, + "num_cols": 2, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "The next cell has an equation", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "$A= \\pi r^{2}$", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "The next cell has another equation", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "$x=\\frac{-b \\pm \\sqrt{b^{2}-4ac}}{2a}$", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + }, + "annotations": [] + } + ], + "key_value_items": [], + "form_items": [], + "pages": {} +} \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/table_with_equations.docx.md b/tests/data/groundtruth/docling_v2/table_with_equations.docx.md new file mode 100644 index 00000000..837e6550 --- /dev/null +++ b/tests/data/groundtruth/docling_v2/table_with_equations.docx.md @@ -0,0 +1,3 @@ +| The next cell has an equation | $A= \pi r^{2}$ | +|------------------------------------|----------------------------------------| +| The next cell has another equation | $x=\frac{-b \pm \sqrt{b^{2}-4ac}}{2a}$ | \ No newline at end of file