Merge from main

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-03-14 13:52:36 +01:00
66 changed files with 2678 additions and 760 deletions

Binary file not shown.

View File

@@ -51,7 +51,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -63,7 +63,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -75,7 +75,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -87,7 +87,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "4",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -296,7 +296,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -308,7 +308,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -320,7 +320,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -332,7 +332,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "4",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@@ -51,7 +51,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Index",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -63,7 +63,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Customer Id",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -75,7 +75,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "First Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -87,7 +87,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Last Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -99,7 +99,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "Company",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -111,7 +111,7 @@
"start_col_offset_idx": 5,
"end_col_offset_idx": 6,
"text": "City",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -123,7 +123,7 @@
"start_col_offset_idx": 6,
"end_col_offset_idx": 7,
"text": "Country",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -135,7 +135,7 @@
"start_col_offset_idx": 7,
"end_col_offset_idx": 8,
"text": "Phone 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -147,7 +147,7 @@
"start_col_offset_idx": 8,
"end_col_offset_idx": 9,
"text": "Phone 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -159,7 +159,7 @@
"start_col_offset_idx": 9,
"end_col_offset_idx": 10,
"text": "Email",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -171,7 +171,7 @@
"start_col_offset_idx": 10,
"end_col_offset_idx": 11,
"text": "Subscription Date",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -183,7 +183,7 @@
"start_col_offset_idx": 11,
"end_col_offset_idx": 12,
"text": "Website",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -920,7 +920,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Index",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -932,7 +932,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Customer Id",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -944,7 +944,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "First Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -956,7 +956,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Last Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -968,7 +968,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "Company",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -980,7 +980,7 @@
"start_col_offset_idx": 5,
"end_col_offset_idx": 6,
"text": "City",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -992,7 +992,7 @@
"start_col_offset_idx": 6,
"end_col_offset_idx": 7,
"text": "Country",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1004,7 +1004,7 @@
"start_col_offset_idx": 7,
"end_col_offset_idx": 8,
"text": "Phone 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1016,7 +1016,7 @@
"start_col_offset_idx": 8,
"end_col_offset_idx": 9,
"text": "Phone 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1028,7 +1028,7 @@
"start_col_offset_idx": 9,
"end_col_offset_idx": 10,
"text": "Email",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1040,7 +1040,7 @@
"start_col_offset_idx": 10,
"end_col_offset_idx": 11,
"text": "Subscription Date",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1052,7 +1052,7 @@
"start_col_offset_idx": 11,
"end_col_offset_idx": 12,
"text": "Website",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@@ -51,7 +51,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -63,7 +63,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -75,7 +75,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -284,7 +284,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -296,7 +296,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -308,7 +308,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},

View File

@@ -51,7 +51,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Index",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -63,7 +63,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Customer Id",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -75,7 +75,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "First Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -87,7 +87,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Last Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -99,7 +99,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "Company",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -111,7 +111,7 @@
"start_col_offset_idx": 5,
"end_col_offset_idx": 6,
"text": "City",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -123,7 +123,7 @@
"start_col_offset_idx": 6,
"end_col_offset_idx": 7,
"text": "Country",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -135,7 +135,7 @@
"start_col_offset_idx": 7,
"end_col_offset_idx": 8,
"text": "Phone 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -147,7 +147,7 @@
"start_col_offset_idx": 8,
"end_col_offset_idx": 9,
"text": "Phone 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -159,7 +159,7 @@
"start_col_offset_idx": 9,
"end_col_offset_idx": 10,
"text": "Email",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -171,7 +171,7 @@
"start_col_offset_idx": 10,
"end_col_offset_idx": 11,
"text": "Subscription Date",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -183,7 +183,7 @@
"start_col_offset_idx": 11,
"end_col_offset_idx": 12,
"text": "Website",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -920,7 +920,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Index",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -932,7 +932,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Customer Id",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -944,7 +944,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "First Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -956,7 +956,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Last Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -968,7 +968,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "Company",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -980,7 +980,7 @@
"start_col_offset_idx": 5,
"end_col_offset_idx": 6,
"text": "City",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -992,7 +992,7 @@
"start_col_offset_idx": 6,
"end_col_offset_idx": 7,
"text": "Country",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1004,7 +1004,7 @@
"start_col_offset_idx": 7,
"end_col_offset_idx": 8,
"text": "Phone 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1016,7 +1016,7 @@
"start_col_offset_idx": 8,
"end_col_offset_idx": 9,
"text": "Phone 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1028,7 +1028,7 @@
"start_col_offset_idx": 9,
"end_col_offset_idx": 10,
"text": "Email",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1040,7 +1040,7 @@
"start_col_offset_idx": 10,
"end_col_offset_idx": 11,
"text": "Subscription Date",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1052,7 +1052,7 @@
"start_col_offset_idx": 11,
"end_col_offset_idx": 12,
"text": "Website",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@@ -51,7 +51,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Index",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -63,7 +63,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Customer Id",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -75,7 +75,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "First Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -87,7 +87,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Last Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -99,7 +99,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "Company",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -111,7 +111,7 @@
"start_col_offset_idx": 5,
"end_col_offset_idx": 6,
"text": "City",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -123,7 +123,7 @@
"start_col_offset_idx": 6,
"end_col_offset_idx": 7,
"text": "Country",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -135,7 +135,7 @@
"start_col_offset_idx": 7,
"end_col_offset_idx": 8,
"text": "Phone 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -147,7 +147,7 @@
"start_col_offset_idx": 8,
"end_col_offset_idx": 9,
"text": "Phone 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -159,7 +159,7 @@
"start_col_offset_idx": 9,
"end_col_offset_idx": 10,
"text": "Email",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -171,7 +171,7 @@
"start_col_offset_idx": 10,
"end_col_offset_idx": 11,
"text": "Subscription Date",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -183,7 +183,7 @@
"start_col_offset_idx": 11,
"end_col_offset_idx": 12,
"text": "Website",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -920,7 +920,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Index",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -932,7 +932,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Customer Id",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -944,7 +944,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "First Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -956,7 +956,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Last Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -968,7 +968,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "Company",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -980,7 +980,7 @@
"start_col_offset_idx": 5,
"end_col_offset_idx": 6,
"text": "City",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -992,7 +992,7 @@
"start_col_offset_idx": 6,
"end_col_offset_idx": 7,
"text": "Country",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1004,7 +1004,7 @@
"start_col_offset_idx": 7,
"end_col_offset_idx": 8,
"text": "Phone 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1016,7 +1016,7 @@
"start_col_offset_idx": 8,
"end_col_offset_idx": 9,
"text": "Phone 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1028,7 +1028,7 @@
"start_col_offset_idx": 9,
"end_col_offset_idx": 10,
"text": "Email",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1040,7 +1040,7 @@
"start_col_offset_idx": 10,
"end_col_offset_idx": 11,
"text": "Subscription Date",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1052,7 +1052,7 @@
"start_col_offset_idx": 11,
"end_col_offset_idx": 12,
"text": "Website",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@@ -51,7 +51,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Index",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -63,7 +63,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Customer Id",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -75,7 +75,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "First Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -87,7 +87,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Last Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -99,7 +99,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "Company",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -111,7 +111,7 @@
"start_col_offset_idx": 5,
"end_col_offset_idx": 6,
"text": "City",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -123,7 +123,7 @@
"start_col_offset_idx": 6,
"end_col_offset_idx": 7,
"text": "Country",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -135,7 +135,7 @@
"start_col_offset_idx": 7,
"end_col_offset_idx": 8,
"text": "Phone 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -147,7 +147,7 @@
"start_col_offset_idx": 8,
"end_col_offset_idx": 9,
"text": "Phone 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -159,7 +159,7 @@
"start_col_offset_idx": 9,
"end_col_offset_idx": 10,
"text": "Email",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -171,7 +171,7 @@
"start_col_offset_idx": 10,
"end_col_offset_idx": 11,
"text": "Subscription Date",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -183,7 +183,7 @@
"start_col_offset_idx": 11,
"end_col_offset_idx": 12,
"text": "Website",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -920,7 +920,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Index",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -932,7 +932,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Customer Id",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -944,7 +944,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "First Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -956,7 +956,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Last Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -968,7 +968,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "Company",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -980,7 +980,7 @@
"start_col_offset_idx": 5,
"end_col_offset_idx": 6,
"text": "City",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -992,7 +992,7 @@
"start_col_offset_idx": 6,
"end_col_offset_idx": 7,
"text": "Country",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1004,7 +1004,7 @@
"start_col_offset_idx": 7,
"end_col_offset_idx": 8,
"text": "Phone 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1016,7 +1016,7 @@
"start_col_offset_idx": 8,
"end_col_offset_idx": 9,
"text": "Phone 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1028,7 +1028,7 @@
"start_col_offset_idx": 9,
"end_col_offset_idx": 10,
"text": "Email",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1040,7 +1040,7 @@
"start_col_offset_idx": 10,
"end_col_offset_idx": 11,
"text": "Subscription Date",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1052,7 +1052,7 @@
"start_col_offset_idx": 11,
"end_col_offset_idx": 12,
"text": "Website",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@@ -51,7 +51,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -63,7 +63,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -75,7 +75,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -87,7 +87,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "4",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -284,7 +284,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -296,7 +296,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -308,7 +308,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -320,7 +320,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "4",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@@ -51,7 +51,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -63,7 +63,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -75,7 +75,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -87,7 +87,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "4",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -308,7 +308,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -320,7 +320,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -332,7 +332,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -344,7 +344,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "4",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},

View File

@@ -0,0 +1,40 @@
item-0 at level 0: unspecified: group _root_
item-1 at level 1: inline: group group
item-2 at level 2: paragraph: This is a word document and this is an inline equation:
item-3 at level 2: formula: A= \pi r^{2}
item-4 at level 2: paragraph: . If instead, I want an equation by line, I can do this:
item-5 at level 1: paragraph:
item-6 at level 1: formula: a^{2}+b^{2}=c^{2} \text{ \texttimes } 23
item-7 at level 1: paragraph: And that is an equation by itself. Cheers!
item-8 at level 1: paragraph:
item-9 at level 1: paragraph: This is another equation:
item-10 at level 1: formula: f\left(x\right)=a_{0}+\sum_{n=1} ... })+b_{n}\sin(\frac{n \pi x}{L})\right)
item-11 at level 1: paragraph:
item-12 at level 1: paragraph: This is text. This is text. This ... s is text. This is text. This is text.
item-13 at level 1: paragraph:
item-14 at level 1: paragraph:
item-15 at level 1: inline: group group
item-16 at level 2: paragraph: This is a word document and this is an inline equation:
item-17 at level 2: formula: A= \pi r^{2}
item-18 at level 2: paragraph: . If instead, I want an equation by line, I can do this:
item-19 at level 1: paragraph:
item-20 at level 1: formula: \left(x+a\right)^{n}=\sum_{k=0}^ ... ac{}{}{0pt}{}{n}{k}\right)x^{k}a^{n-k}
item-21 at level 1: paragraph:
item-22 at level 1: paragraph: And that is an equation by itself. Cheers!
item-23 at level 1: paragraph:
item-24 at level 1: paragraph: This is another equation:
item-25 at level 1: paragraph:
item-26 at level 1: formula: \left(1+x\right)^{n}=1+\frac{nx} ... ght)x^{2}}{2!}+ \text{ \textellipsis }
item-27 at level 1: paragraph:
item-28 at level 1: paragraph: This is text. This is text. This ... s is text. This is text. This is text.
item-29 at level 1: paragraph:
item-30 at level 1: paragraph:
item-31 at level 1: inline: group group
item-32 at level 2: paragraph: This is a word document and this is an inline equation:
item-33 at level 2: formula: A= \pi r^{2}
item-34 at level 2: paragraph: . If instead, I want an equation by line, I can do this:
item-35 at level 1: paragraph:
item-36 at level 1: formula: e^{x}=1+\frac{x}{1!}+\frac{x^{2} ... xtellipsis } , - \infty < x < \infty
item-37 at level 1: paragraph:
item-38 at level 1: paragraph: And that is an equation by itself. Cheers!
item-39 at level 1: paragraph:

View File

@@ -0,0 +1,616 @@
{
"schema_name": "DoclingDocument",
"version": "1.2.0",
"name": "equations",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"binary_hash": 11121138535595486899,
"filename": "equations.docx"
},
"furniture": {
"self_ref": "#/furniture",
"children": [],
"content_layer": "furniture",
"name": "_root_",
"label": "unspecified"
},
"body": {
"self_ref": "#/body",
"children": [
{
"$ref": "#/groups/0"
},
{
"$ref": "#/texts/3"
},
{
"$ref": "#/texts/4"
},
{
"$ref": "#/texts/5"
},
{
"$ref": "#/texts/6"
},
{
"$ref": "#/texts/7"
},
{
"$ref": "#/texts/8"
},
{
"$ref": "#/texts/9"
},
{
"$ref": "#/texts/10"
},
{
"$ref": "#/texts/11"
},
{
"$ref": "#/texts/12"
},
{
"$ref": "#/groups/1"
},
{
"$ref": "#/texts/16"
},
{
"$ref": "#/texts/17"
},
{
"$ref": "#/texts/18"
},
{
"$ref": "#/texts/19"
},
{
"$ref": "#/texts/20"
},
{
"$ref": "#/texts/21"
},
{
"$ref": "#/texts/22"
},
{
"$ref": "#/texts/23"
},
{
"$ref": "#/texts/24"
},
{
"$ref": "#/texts/25"
},
{
"$ref": "#/texts/26"
},
{
"$ref": "#/texts/27"
},
{
"$ref": "#/groups/2"
},
{
"$ref": "#/texts/31"
},
{
"$ref": "#/texts/32"
},
{
"$ref": "#/texts/33"
},
{
"$ref": "#/texts/34"
},
{
"$ref": "#/texts/35"
}
],
"content_layer": "body",
"name": "_root_",
"label": "unspecified"
},
"groups": [
{
"self_ref": "#/groups/0",
"parent": {
"$ref": "#/body"
},
"children": [
{
"$ref": "#/texts/0"
},
{
"$ref": "#/texts/1"
},
{
"$ref": "#/texts/2"
}
],
"content_layer": "body",
"name": "group",
"label": "inline"
},
{
"self_ref": "#/groups/1",
"parent": {
"$ref": "#/body"
},
"children": [
{
"$ref": "#/texts/13"
},
{
"$ref": "#/texts/14"
},
{
"$ref": "#/texts/15"
}
],
"content_layer": "body",
"name": "group",
"label": "inline"
},
{
"self_ref": "#/groups/2",
"parent": {
"$ref": "#/body"
},
"children": [
{
"$ref": "#/texts/28"
},
{
"$ref": "#/texts/29"
},
{
"$ref": "#/texts/30"
}
],
"content_layer": "body",
"name": "group",
"label": "inline"
}
],
"texts": [
{
"self_ref": "#/texts/0",
"parent": {
"$ref": "#/groups/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is a word document and this is an inline equation: ",
"text": "This is a word document and this is an inline equation: "
},
{
"self_ref": "#/texts/1",
"parent": {
"$ref": "#/groups/0"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "A= \\pi r^{2} ",
"text": "A= \\pi r^{2} "
},
{
"self_ref": "#/texts/2",
"parent": {
"$ref": "#/groups/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": ". If instead, I want an equation by line, I can do this:",
"text": ". If instead, I want an equation by line, I can do this:"
},
{
"self_ref": "#/texts/3",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/4",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23",
"text": "a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23"
},
{
"self_ref": "#/texts/5",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "And that is an equation by itself. Cheers!",
"text": "And that is an equation by itself. Cheers!"
},
{
"self_ref": "#/texts/6",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/7",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is another equation:",
"text": "This is another equation:"
},
{
"self_ref": "#/texts/8",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)",
"text": "f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)"
},
{
"self_ref": "#/texts/9",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/10",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text."
},
{
"self_ref": "#/texts/11",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/12",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/13",
"parent": {
"$ref": "#/groups/1"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is a word document and this is an inline equation: ",
"text": "This is a word document and this is an inline equation: "
},
{
"self_ref": "#/texts/14",
"parent": {
"$ref": "#/groups/1"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "A= \\pi r^{2} ",
"text": "A= \\pi r^{2} "
},
{
"self_ref": "#/texts/15",
"parent": {
"$ref": "#/groups/1"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": ". If instead, I want an equation by line, I can do this:",
"text": ". If instead, I want an equation by line, I can do this:"
},
{
"self_ref": "#/texts/16",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/17",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}",
"text": "\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}"
},
{
"self_ref": "#/texts/18",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/19",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "And that is an equation by itself. Cheers!",
"text": "And that is an equation by itself. Cheers!"
},
{
"self_ref": "#/texts/20",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/21",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is another equation:",
"text": "This is another equation:"
},
{
"self_ref": "#/texts/22",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/23",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis }",
"text": "\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis }"
},
{
"self_ref": "#/texts/24",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/25",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text."
},
{
"self_ref": "#/texts/26",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/27",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/28",
"parent": {
"$ref": "#/groups/2"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is a word document and this is an inline equation: ",
"text": "This is a word document and this is an inline equation: "
},
{
"self_ref": "#/texts/29",
"parent": {
"$ref": "#/groups/2"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "A= \\pi r^{2} ",
"text": "A= \\pi r^{2} "
},
{
"self_ref": "#/texts/30",
"parent": {
"$ref": "#/groups/2"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": ". If instead, I want an equation by line, I can do this:",
"text": ". If instead, I want an equation by line, I can do this:"
},
{
"self_ref": "#/texts/31",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/32",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty",
"text": "e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty"
},
{
"self_ref": "#/texts/33",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/34",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "And that is an equation by itself. Cheers!",
"text": "And that is an equation by itself. Cheers!"
},
{
"self_ref": "#/texts/35",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
}
],
"pictures": [],
"tables": [],
"key_value_items": [],
"form_items": [],
"pages": {}
}

View File

@@ -0,0 +1,29 @@
This is a word document and this is an inline equation: $A= \pi r^{2} $ . If instead, I want an equation by line, I can do this:
$$a^{2}+b^{2}=c^{2} \text{ \texttimes } 23$$
And that is an equation by itself. Cheers!
This is another equation:
$$f\left(x\right)=a_{0}+\sum_{n=1}^{ \infty }\left(a_{n}\cos(\frac{n \pi x}{L})+b_{n}\sin(\frac{n \pi x}{L})\right)$$
This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.
This is a word document and this is an inline equation: $A= \pi r^{2} $ . If instead, I want an equation by line, I can do this:
$$\left(x+a\right)^{n}=\sum_{k=0}^{n}\left(\genfrac{}{}{0pt}{}{n}{k}\right)x^{k}a^{n-k}$$
And that is an equation by itself. Cheers!
This is another equation:
$$\left(1+x\right)^{n}=1+\frac{nx}{1!}+\frac{n\left(n-1\right)x^{2}}{2!}+ \text{ \textellipsis }$$
This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.
This is a word document and this is an inline equation: $A= \pi r^{2} $ . If instead, I want an equation by line, I can do this:
$$e^{x}=1+\frac{x}{1!}+\frac{x^{2}}{2!}+\frac{x^{3}}{3!}+ \text{ \textellipsis } , - \infty < x < \infty$$
And that is an equation by itself. Cheers!

View File

@@ -344,7 +344,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -356,7 +356,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -368,7 +368,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -493,7 +493,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -505,7 +505,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -517,7 +517,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@@ -68,7 +68,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -80,7 +80,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "Header 2 & 3 (colspan)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -181,7 +181,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -193,7 +193,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "Header 2 & 3 (colspan)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -205,7 +205,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "Header 2 & 3 (colspan)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@@ -68,7 +68,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -80,7 +80,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "Header 2 & 3 (colspan)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -181,7 +181,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -193,7 +193,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "Header 2 & 3 (colspan)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -205,7 +205,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "Header 2 & 3 (colspan)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@@ -0,0 +1,22 @@
item-0 at level 0: unspecified: group _root_
item-1 at level 1: list: group list
item-2 at level 2: list_item: Asia
item-3 at level 3: list: group list
item-4 at level 4: list_item: China
item-5 at level 4: list_item: Japan
item-6 at level 4: list_item: Thailand
item-7 at level 2: list_item: Europe
item-8 at level 3: list: group list
item-9 at level 4: list_item: UK
item-10 at level 4: list_item: Germany
item-11 at level 4: list_item: Switzerland
item-12 at level 5: list: group list
item-13 at level 6: list: group list
item-14 at level 7: list_item: Bern
item-15 at level 7: list_item: Aargau
item-16 at level 4: list_item: Italy
item-17 at level 5: list: group list
item-18 at level 6: list: group list
item-19 at level 7: list_item: Piedmont
item-20 at level 7: list_item: Liguria
item-21 at level 2: list_item: Africa

View File

@@ -0,0 +1,374 @@
{
"schema_name": "DoclingDocument",
"version": "1.2.0",
"name": "example_07",
"origin": {
"mimetype": "text/html",
"binary_hash": 623628706615267627,
"filename": "example_07.html"
},
"furniture": {
"self_ref": "#/furniture",
"children": [],
"content_layer": "furniture",
"name": "_root_",
"label": "unspecified"
},
"body": {
"self_ref": "#/body",
"children": [
{
"$ref": "#/groups/0"
}
],
"content_layer": "body",
"name": "_root_",
"label": "unspecified"
},
"groups": [
{
"self_ref": "#/groups/0",
"parent": {
"$ref": "#/body"
},
"children": [
{
"$ref": "#/texts/0"
},
{
"$ref": "#/texts/4"
},
{
"$ref": "#/texts/13"
}
],
"content_layer": "body",
"name": "list",
"label": "list"
},
{
"self_ref": "#/groups/1",
"parent": {
"$ref": "#/texts/0"
},
"children": [
{
"$ref": "#/texts/1"
},
{
"$ref": "#/texts/2"
},
{
"$ref": "#/texts/3"
}
],
"content_layer": "body",
"name": "list",
"label": "list"
},
{
"self_ref": "#/groups/2",
"parent": {
"$ref": "#/texts/4"
},
"children": [
{
"$ref": "#/texts/5"
},
{
"$ref": "#/texts/6"
},
{
"$ref": "#/texts/7"
},
{
"$ref": "#/texts/10"
}
],
"content_layer": "body",
"name": "list",
"label": "list"
},
{
"self_ref": "#/groups/3",
"parent": {
"$ref": "#/texts/7"
},
"children": [
{
"$ref": "#/groups/4"
}
],
"content_layer": "body",
"name": "list",
"label": "list"
},
{
"self_ref": "#/groups/4",
"parent": {
"$ref": "#/groups/3"
},
"children": [
{
"$ref": "#/texts/8"
},
{
"$ref": "#/texts/9"
}
],
"content_layer": "body",
"name": "list",
"label": "list"
},
{
"self_ref": "#/groups/5",
"parent": {
"$ref": "#/texts/10"
},
"children": [
{
"$ref": "#/groups/6"
}
],
"content_layer": "body",
"name": "list",
"label": "list"
},
{
"self_ref": "#/groups/6",
"parent": {
"$ref": "#/groups/5"
},
"children": [
{
"$ref": "#/texts/11"
},
{
"$ref": "#/texts/12"
}
],
"content_layer": "body",
"name": "list",
"label": "list"
}
],
"texts": [
{
"self_ref": "#/texts/0",
"parent": {
"$ref": "#/groups/0"
},
"children": [
{
"$ref": "#/groups/1"
}
],
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "Asia",
"text": "Asia",
"enumerated": false,
"marker": "-"
},
{
"self_ref": "#/texts/1",
"parent": {
"$ref": "#/groups/1"
},
"children": [],
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "China",
"text": "China",
"enumerated": false,
"marker": "-"
},
{
"self_ref": "#/texts/2",
"parent": {
"$ref": "#/groups/1"
},
"children": [],
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "Japan",
"text": "Japan",
"enumerated": false,
"marker": "-"
},
{
"self_ref": "#/texts/3",
"parent": {
"$ref": "#/groups/1"
},
"children": [],
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "Thailand",
"text": "Thailand",
"enumerated": false,
"marker": "-"
},
{
"self_ref": "#/texts/4",
"parent": {
"$ref": "#/groups/0"
},
"children": [
{
"$ref": "#/groups/2"
}
],
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "Europe",
"text": "Europe",
"enumerated": false,
"marker": "-"
},
{
"self_ref": "#/texts/5",
"parent": {
"$ref": "#/groups/2"
},
"children": [],
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "UK",
"text": "UK",
"enumerated": false,
"marker": "-"
},
{
"self_ref": "#/texts/6",
"parent": {
"$ref": "#/groups/2"
},
"children": [],
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "Germany",
"text": "Germany",
"enumerated": false,
"marker": "-"
},
{
"self_ref": "#/texts/7",
"parent": {
"$ref": "#/groups/2"
},
"children": [
{
"$ref": "#/groups/3"
}
],
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "Switzerland",
"text": "Switzerland",
"enumerated": false,
"marker": "-"
},
{
"self_ref": "#/texts/8",
"parent": {
"$ref": "#/groups/4"
},
"children": [],
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "Bern",
"text": "Bern",
"enumerated": false,
"marker": "-"
},
{
"self_ref": "#/texts/9",
"parent": {
"$ref": "#/groups/4"
},
"children": [],
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "Aargau",
"text": "Aargau",
"enumerated": false,
"marker": "-"
},
{
"self_ref": "#/texts/10",
"parent": {
"$ref": "#/groups/2"
},
"children": [
{
"$ref": "#/groups/5"
}
],
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "Italy",
"text": "Italy",
"enumerated": false,
"marker": "-"
},
{
"self_ref": "#/texts/11",
"parent": {
"$ref": "#/groups/6"
},
"children": [],
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "Piedmont",
"text": "Piedmont",
"enumerated": false,
"marker": "-"
},
{
"self_ref": "#/texts/12",
"parent": {
"$ref": "#/groups/6"
},
"children": [],
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "Liguria",
"text": "Liguria",
"enumerated": false,
"marker": "-"
},
{
"self_ref": "#/texts/13",
"parent": {
"$ref": "#/groups/0"
},
"children": [],
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "Africa",
"text": "Africa",
"enumerated": false,
"marker": "-"
}
],
"pictures": [],
"tables": [],
"key_value_items": [],
"form_items": [],
"pages": {}
}

View File

@@ -0,0 +1,14 @@
- Asia
- China
- Japan
- Thailand
- Europe
- UK
- Germany
- Switzerland
- Bern
- Aargau
- Italy
- Piedmont
- Liguria
- Africa

View File

@@ -960,7 +960,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 4,
"text": "Class1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -972,7 +972,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 7,
"text": "Class2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1385,7 +1385,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 4,
"text": "Class1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1397,7 +1397,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 4,
"text": "Class1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1409,7 +1409,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 4,
"text": "Class1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1421,7 +1421,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 7,
"text": "Class2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1433,7 +1433,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 7,
"text": "Class2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1445,7 +1445,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 7,
"text": "Class2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@@ -176,7 +176,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Tab1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -188,7 +188,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Tab2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -200,7 +200,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Tab3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -289,7 +289,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Tab1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -301,7 +301,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Tab2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -313,7 +313,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Tab3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@@ -136,7 +136,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "first ",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -148,7 +148,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "second ",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -160,7 +160,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "third",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -393,7 +393,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "first ",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -405,7 +405,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "second ",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -417,7 +417,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "third",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@@ -675,7 +675,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "col-1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -687,7 +687,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "col-2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -699,7 +699,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "col-3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -711,7 +711,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "col-4",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1112,7 +1112,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "col-1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1124,7 +1124,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "col-2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1136,7 +1136,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "col-3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1148,7 +1148,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "col-4",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@@ -1578,7 +1578,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "col-1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1590,7 +1590,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "col-2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1602,7 +1602,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "col-3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1763,7 +1763,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "col-1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1775,7 +1775,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "col-2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1787,7 +1787,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "col-3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@@ -1969,7 +1969,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "col-1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1981,7 +1981,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "col-2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1993,7 +1993,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "col-3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -2154,7 +2154,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "col-1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -2166,7 +2166,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "col-2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -2178,7 +2178,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "col-3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@@ -2360,7 +2360,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "first ",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -2372,7 +2372,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "header",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -2545,7 +2545,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "first ",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -2557,7 +2557,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "header",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -2569,7 +2569,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "header",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@@ -2583,7 +2583,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "first ",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -2827,7 +2827,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "first (f)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -2839,7 +2839,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "header (f)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -3012,7 +3012,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "first (f)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -3024,7 +3024,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "header (f)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -3036,7 +3036,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "header (f)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@@ -3050,7 +3050,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "first (f)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},

View File

@@ -7914,7 +7914,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Duck\n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -7950,7 +7950,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Scientific classification \n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -8130,7 +8130,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Subfamilies\n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -8159,7 +8159,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Duck\n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -8171,7 +8171,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Duck\n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@@ -8237,7 +8237,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Scientific classification \n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -8249,7 +8249,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Scientific classification \n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@@ -8445,7 +8445,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Subfamilies\n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -8457,7 +8457,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Subfamilies\n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@@ -8513,7 +8513,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Authority control databases ",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -8578,7 +8578,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Authority control databases ",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -8590,7 +8590,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Authority control databases ",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@@ -490,7 +490,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -502,7 +502,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Food",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -514,7 +514,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Calories per portion",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -639,7 +639,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -651,7 +651,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Food",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -663,7 +663,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Calories per portion",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@@ -71,19 +71,19 @@
</head>
<h2>Test with tables</h2>
<p>A uniform table</p>
<table><tbody><tr><td>Header 0.0</td><td>Header 0.1</td><td>Header 0.2</td></tr><tr><td>Cell 1.0</td><td>Cell 1.1</td><td>Cell 1.2</td></tr><tr><td>Cell 2.0</td><td>Cell 2.1</td><td>Cell 2.2</td></tr></tbody></table>
<table><tbody><tr><th>Header 0.0</th><th>Header 0.1</th><th>Header 0.2</th></tr><tr><td>Cell 1.0</td><td>Cell 1.1</td><td>Cell 1.2</td></tr><tr><td>Cell 2.0</td><td>Cell 2.1</td><td>Cell 2.2</td></tr></tbody></table>
<p></p>
<p>A non-uniform table with horizontal spans</p>
<table><tbody><tr><td>Header 0.0</td><td>Header 0.1</td><td>Header 0.2</td></tr><tr><td>Cell 1.0</td><td colspan="2">Merged Cell 1.1 1.2</td></tr><tr><td>Cell 2.0</td><td colspan="2">Merged Cell 2.1 2.2</td></tr></tbody></table>
<table><tbody><tr><th>Header 0.0</th><th>Header 0.1</th><th>Header 0.2</th></tr><tr><td>Cell 1.0</td><td colspan="2">Merged Cell 1.1 1.2</td></tr><tr><td>Cell 2.0</td><td colspan="2">Merged Cell 2.1 2.2</td></tr></tbody></table>
<p></p>
<p>A non-uniform table with horizontal spans in inner columns</p>
<table><tbody><tr><td>Header 0.0</td><td>Header 0.1</td><td>Header 0.2</td><td>Header 0.3</td></tr><tr><td>Cell 1.0</td><td colspan="2">Merged Cell 1.1 1.2</td><td>Cell 1.3</td></tr><tr><td>Cell 2.0</td><td colspan="2">Merged Cell 2.1 2.2</td><td>Cell 2.3</td></tr></tbody></table>
<table><tbody><tr><th>Header 0.0</th><th>Header 0.1</th><th>Header 0.2</th><th>Header 0.3</th></tr><tr><td>Cell 1.0</td><td colspan="2">Merged Cell 1.1 1.2</td><td>Cell 1.3</td></tr><tr><td>Cell 2.0</td><td colspan="2">Merged Cell 2.1 2.2</td><td>Cell 2.3</td></tr></tbody></table>
<p></p>
<p>A non-uniform table with vertical spans</p>
<table><tbody><tr><td>Header 0.0</td><td>Header 0.1</td><td>Header 0.2</td></tr><tr><td>Cell 1.0</td><td rowspan="2">Merged Cell 1.1 2.1</td><td>Cell 1.2</td></tr><tr><td>Cell 2.0</td><td>Cell 2.2</td></tr><tr><td>Cell 3.0</td><td rowspan="2">Merged Cell 3.1 4.1</td><td>Cell 3.2</td></tr><tr><td>Cell 4.0</td><td>Cell 4.2</td></tr></tbody></table>
<table><tbody><tr><th>Header 0.0</th><th>Header 0.1</th><th>Header 0.2</th></tr><tr><td>Cell 1.0</td><td rowspan="2">Merged Cell 1.1 2.1</td><td>Cell 1.2</td></tr><tr><td>Cell 2.0</td><td>Cell 2.2</td></tr><tr><td>Cell 3.0</td><td rowspan="2">Merged Cell 3.1 4.1</td><td>Cell 3.2</td></tr><tr><td>Cell 4.0</td><td>Cell 4.2</td></tr></tbody></table>
<p></p>
<p>A non-uniform table with all kinds of spans and empty cells</p>
<table><tbody><tr><td>Header 0.0</td><td>Header 0.1</td><td>Header 0.2</td><td></td><td></td></tr><tr><td>Cell 1.0</td><td rowspan="2">Merged Cell 1.1 2.1</td><td>Cell 1.2</td><td></td><td></td></tr><tr><td>Cell 2.0</td><td>Cell 2.2</td><td></td><td></td></tr><tr><td>Cell 3.0</td><td rowspan="2">Merged Cell 3.1 4.1</td><td>Cell 3.2</td><td rowspan="3"></td><td></td></tr><tr><td>Cell 4.0</td><td>Cell 4.2</td><td rowspan="2">Merged Cell 4.4 5.4</td></tr><tr><td></td><td></td><td></td></tr><tr><td></td><td></td><td></td><td></td><td></td></tr><tr><td colspan="5"></td></tr><tr><td></td><td></td><td></td><td></td><td>Cell 8.4</td></tr></tbody></table>
<table><tbody><tr><th>Header 0.0</th><th>Header 0.1</th><th>Header 0.2</th><th></th><th></th></tr><tr><td>Cell 1.0</td><td rowspan="2">Merged Cell 1.1 2.1</td><td>Cell 1.2</td><td></td><td></td></tr><tr><td>Cell 2.0</td><td>Cell 2.2</td><td></td><td></td></tr><tr><td>Cell 3.0</td><td rowspan="2">Merged Cell 3.1 4.1</td><td>Cell 3.2</td><td rowspan="3"></td><td></td></tr><tr><td>Cell 4.0</td><td>Cell 4.2</td><td rowspan="2">Merged Cell 4.4 5.4</td></tr><tr><td></td><td></td><td></td></tr><tr><td></td><td></td><td></td><td></td><td></td></tr><tr><td colspan="5"></td></tr><tr><td></td><td></td><td></td><td></td><td>Cell 8.4</td></tr></tbody></table>
<p></p>
<p></p>
</html>

View File

@@ -261,7 +261,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -273,7 +273,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -285,7 +285,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -374,7 +374,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -386,7 +386,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -398,7 +398,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@@ -504,7 +504,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -516,7 +516,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -528,7 +528,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -593,7 +593,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -605,7 +605,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -617,7 +617,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@@ -723,7 +723,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -735,7 +735,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -747,7 +747,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -759,7 +759,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Header 0.3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -848,7 +848,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -860,7 +860,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -872,7 +872,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -884,7 +884,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Header 0.3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@@ -1014,7 +1014,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1026,7 +1026,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1038,7 +1038,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1175,7 +1175,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1187,7 +1187,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1199,7 +1199,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@@ -1381,7 +1381,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1393,7 +1393,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1405,7 +1405,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1417,7 +1417,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1429,7 +1429,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1818,7 +1818,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1830,7 +1830,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1842,7 +1842,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1854,7 +1854,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@@ -1866,7 +1866,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@@ -0,0 +1,40 @@
<html>
<body>
<ul>
<li>Asia
<ul>
<li>China</li>
<li>Japan</li>
<li>Thailand</li>
</ul>
</li>
<li>Europe
<ul>
<li>UK</li>
<li>Germany</li>
<li>Switzerland
<ul>
<li style="list-style-type: none;">
<ul>
<li>Bern</li>
<li>Aargau</li>
</ul>
</li>
</ul>
</li>
<li>Italy
<ul>
<li style="list-style-type: none;">
<ul>
<li>Piedmont</li>
<li>Liguria</li>
</ul>
</li>
</ul>
</li>
</ul>
</li>
<li>Africa</li>
</ul>
</body>
</html>

View File

@@ -59,7 +59,11 @@ def test_e2e_valid_csv_conversions():
pred_itxt, str(gt_path) + ".itxt"
), "export to indented-text"
assert verify_document(doc, str(gt_path) + ".json"), "export to json"
assert verify_document(
pred_doc=doc,
gtfile=str(gt_path) + ".json",
generate=GENERATE,
), "export to json"
def test_e2e_invalid_csv_conversions():

View File

@@ -91,4 +91,8 @@ def test_e2e_docx_conversions():
if docx_path.name == "word_tables.docx":
pred_html: str = doc.export_to_html()
assert verify_export(pred_html, str(gt_path) + ".html"), "export to html"
assert verify_export(
pred_text=pred_html,
gtfile=str(gt_path) + ".html",
generate=GENERATE,
), "export to html"

View File

@@ -179,7 +179,7 @@ def test_guess_format(tmp_path):
# Non-Docling JSON
# TODO: Docling JSON is currently the single supported JSON flavor and the pipeline
# will try to validate *any* JSON (based on suffix/MIME) as Docling JSON; proper
# disambiguation seen as part of https://github.com/DS4SD/docling/issues/802
# disambiguation seen as part of https://github.com/docling-project/docling/issues/802
test_str = "{}"
stream = DocumentStream(name="test.json", stream=BytesIO(f"{test_str}".encode()))
assert dci._guess_format(stream) == InputFormat.JSON_DOCLING