mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
BIN
tests/data/docx/equations.docx
Normal file
BIN
tests/data/docx/equations.docx
Normal file
Binary file not shown.
@@ -51,7 +51,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -63,7 +63,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -75,7 +75,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -87,7 +87,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "4",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -296,7 +296,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -308,7 +308,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -320,7 +320,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -332,7 +332,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "4",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Index",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -63,7 +63,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Customer Id",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -75,7 +75,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "First Name",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -87,7 +87,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "Last Name",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -99,7 +99,7 @@
|
||||
"start_col_offset_idx": 4,
|
||||
"end_col_offset_idx": 5,
|
||||
"text": "Company",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -111,7 +111,7 @@
|
||||
"start_col_offset_idx": 5,
|
||||
"end_col_offset_idx": 6,
|
||||
"text": "City",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -123,7 +123,7 @@
|
||||
"start_col_offset_idx": 6,
|
||||
"end_col_offset_idx": 7,
|
||||
"text": "Country",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -135,7 +135,7 @@
|
||||
"start_col_offset_idx": 7,
|
||||
"end_col_offset_idx": 8,
|
||||
"text": "Phone 1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -147,7 +147,7 @@
|
||||
"start_col_offset_idx": 8,
|
||||
"end_col_offset_idx": 9,
|
||||
"text": "Phone 2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -159,7 +159,7 @@
|
||||
"start_col_offset_idx": 9,
|
||||
"end_col_offset_idx": 10,
|
||||
"text": "Email",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -171,7 +171,7 @@
|
||||
"start_col_offset_idx": 10,
|
||||
"end_col_offset_idx": 11,
|
||||
"text": "Subscription Date",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -183,7 +183,7 @@
|
||||
"start_col_offset_idx": 11,
|
||||
"end_col_offset_idx": 12,
|
||||
"text": "Website",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -920,7 +920,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Index",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -932,7 +932,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Customer Id",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -944,7 +944,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "First Name",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -956,7 +956,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "Last Name",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -968,7 +968,7 @@
|
||||
"start_col_offset_idx": 4,
|
||||
"end_col_offset_idx": 5,
|
||||
"text": "Company",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -980,7 +980,7 @@
|
||||
"start_col_offset_idx": 5,
|
||||
"end_col_offset_idx": 6,
|
||||
"text": "City",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -992,7 +992,7 @@
|
||||
"start_col_offset_idx": 6,
|
||||
"end_col_offset_idx": 7,
|
||||
"text": "Country",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1004,7 +1004,7 @@
|
||||
"start_col_offset_idx": 7,
|
||||
"end_col_offset_idx": 8,
|
||||
"text": "Phone 1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1016,7 +1016,7 @@
|
||||
"start_col_offset_idx": 8,
|
||||
"end_col_offset_idx": 9,
|
||||
"text": "Phone 2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1028,7 +1028,7 @@
|
||||
"start_col_offset_idx": 9,
|
||||
"end_col_offset_idx": 10,
|
||||
"text": "Email",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1040,7 +1040,7 @@
|
||||
"start_col_offset_idx": 10,
|
||||
"end_col_offset_idx": 11,
|
||||
"text": "Subscription Date",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1052,7 +1052,7 @@
|
||||
"start_col_offset_idx": 11,
|
||||
"end_col_offset_idx": 12,
|
||||
"text": "Website",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -63,7 +63,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -75,7 +75,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -284,7 +284,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -296,7 +296,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -308,7 +308,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Index",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -63,7 +63,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Customer Id",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -75,7 +75,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "First Name",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -87,7 +87,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "Last Name",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -99,7 +99,7 @@
|
||||
"start_col_offset_idx": 4,
|
||||
"end_col_offset_idx": 5,
|
||||
"text": "Company",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -111,7 +111,7 @@
|
||||
"start_col_offset_idx": 5,
|
||||
"end_col_offset_idx": 6,
|
||||
"text": "City",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -123,7 +123,7 @@
|
||||
"start_col_offset_idx": 6,
|
||||
"end_col_offset_idx": 7,
|
||||
"text": "Country",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -135,7 +135,7 @@
|
||||
"start_col_offset_idx": 7,
|
||||
"end_col_offset_idx": 8,
|
||||
"text": "Phone 1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -147,7 +147,7 @@
|
||||
"start_col_offset_idx": 8,
|
||||
"end_col_offset_idx": 9,
|
||||
"text": "Phone 2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -159,7 +159,7 @@
|
||||
"start_col_offset_idx": 9,
|
||||
"end_col_offset_idx": 10,
|
||||
"text": "Email",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -171,7 +171,7 @@
|
||||
"start_col_offset_idx": 10,
|
||||
"end_col_offset_idx": 11,
|
||||
"text": "Subscription Date",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -183,7 +183,7 @@
|
||||
"start_col_offset_idx": 11,
|
||||
"end_col_offset_idx": 12,
|
||||
"text": "Website",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -920,7 +920,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Index",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -932,7 +932,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Customer Id",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -944,7 +944,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "First Name",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -956,7 +956,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "Last Name",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -968,7 +968,7 @@
|
||||
"start_col_offset_idx": 4,
|
||||
"end_col_offset_idx": 5,
|
||||
"text": "Company",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -980,7 +980,7 @@
|
||||
"start_col_offset_idx": 5,
|
||||
"end_col_offset_idx": 6,
|
||||
"text": "City",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -992,7 +992,7 @@
|
||||
"start_col_offset_idx": 6,
|
||||
"end_col_offset_idx": 7,
|
||||
"text": "Country",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1004,7 +1004,7 @@
|
||||
"start_col_offset_idx": 7,
|
||||
"end_col_offset_idx": 8,
|
||||
"text": "Phone 1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1016,7 +1016,7 @@
|
||||
"start_col_offset_idx": 8,
|
||||
"end_col_offset_idx": 9,
|
||||
"text": "Phone 2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1028,7 +1028,7 @@
|
||||
"start_col_offset_idx": 9,
|
||||
"end_col_offset_idx": 10,
|
||||
"text": "Email",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1040,7 +1040,7 @@
|
||||
"start_col_offset_idx": 10,
|
||||
"end_col_offset_idx": 11,
|
||||
"text": "Subscription Date",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1052,7 +1052,7 @@
|
||||
"start_col_offset_idx": 11,
|
||||
"end_col_offset_idx": 12,
|
||||
"text": "Website",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Index",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -63,7 +63,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Customer Id",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -75,7 +75,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "First Name",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -87,7 +87,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "Last Name",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -99,7 +99,7 @@
|
||||
"start_col_offset_idx": 4,
|
||||
"end_col_offset_idx": 5,
|
||||
"text": "Company",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -111,7 +111,7 @@
|
||||
"start_col_offset_idx": 5,
|
||||
"end_col_offset_idx": 6,
|
||||
"text": "City",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -123,7 +123,7 @@
|
||||
"start_col_offset_idx": 6,
|
||||
"end_col_offset_idx": 7,
|
||||
"text": "Country",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -135,7 +135,7 @@
|
||||
"start_col_offset_idx": 7,
|
||||
"end_col_offset_idx": 8,
|
||||
"text": "Phone 1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -147,7 +147,7 @@
|
||||
"start_col_offset_idx": 8,
|
||||
"end_col_offset_idx": 9,
|
||||
"text": "Phone 2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -159,7 +159,7 @@
|
||||
"start_col_offset_idx": 9,
|
||||
"end_col_offset_idx": 10,
|
||||
"text": "Email",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -171,7 +171,7 @@
|
||||
"start_col_offset_idx": 10,
|
||||
"end_col_offset_idx": 11,
|
||||
"text": "Subscription Date",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -183,7 +183,7 @@
|
||||
"start_col_offset_idx": 11,
|
||||
"end_col_offset_idx": 12,
|
||||
"text": "Website",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -920,7 +920,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Index",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -932,7 +932,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Customer Id",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -944,7 +944,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "First Name",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -956,7 +956,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "Last Name",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -968,7 +968,7 @@
|
||||
"start_col_offset_idx": 4,
|
||||
"end_col_offset_idx": 5,
|
||||
"text": "Company",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -980,7 +980,7 @@
|
||||
"start_col_offset_idx": 5,
|
||||
"end_col_offset_idx": 6,
|
||||
"text": "City",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -992,7 +992,7 @@
|
||||
"start_col_offset_idx": 6,
|
||||
"end_col_offset_idx": 7,
|
||||
"text": "Country",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1004,7 +1004,7 @@
|
||||
"start_col_offset_idx": 7,
|
||||
"end_col_offset_idx": 8,
|
||||
"text": "Phone 1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1016,7 +1016,7 @@
|
||||
"start_col_offset_idx": 8,
|
||||
"end_col_offset_idx": 9,
|
||||
"text": "Phone 2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1028,7 +1028,7 @@
|
||||
"start_col_offset_idx": 9,
|
||||
"end_col_offset_idx": 10,
|
||||
"text": "Email",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1040,7 +1040,7 @@
|
||||
"start_col_offset_idx": 10,
|
||||
"end_col_offset_idx": 11,
|
||||
"text": "Subscription Date",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1052,7 +1052,7 @@
|
||||
"start_col_offset_idx": 11,
|
||||
"end_col_offset_idx": 12,
|
||||
"text": "Website",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Index",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -63,7 +63,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Customer Id",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -75,7 +75,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "First Name",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -87,7 +87,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "Last Name",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -99,7 +99,7 @@
|
||||
"start_col_offset_idx": 4,
|
||||
"end_col_offset_idx": 5,
|
||||
"text": "Company",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -111,7 +111,7 @@
|
||||
"start_col_offset_idx": 5,
|
||||
"end_col_offset_idx": 6,
|
||||
"text": "City",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -123,7 +123,7 @@
|
||||
"start_col_offset_idx": 6,
|
||||
"end_col_offset_idx": 7,
|
||||
"text": "Country",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -135,7 +135,7 @@
|
||||
"start_col_offset_idx": 7,
|
||||
"end_col_offset_idx": 8,
|
||||
"text": "Phone 1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -147,7 +147,7 @@
|
||||
"start_col_offset_idx": 8,
|
||||
"end_col_offset_idx": 9,
|
||||
"text": "Phone 2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -159,7 +159,7 @@
|
||||
"start_col_offset_idx": 9,
|
||||
"end_col_offset_idx": 10,
|
||||
"text": "Email",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -171,7 +171,7 @@
|
||||
"start_col_offset_idx": 10,
|
||||
"end_col_offset_idx": 11,
|
||||
"text": "Subscription Date",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -183,7 +183,7 @@
|
||||
"start_col_offset_idx": 11,
|
||||
"end_col_offset_idx": 12,
|
||||
"text": "Website",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -920,7 +920,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Index",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -932,7 +932,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Customer Id",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -944,7 +944,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "First Name",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -956,7 +956,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "Last Name",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -968,7 +968,7 @@
|
||||
"start_col_offset_idx": 4,
|
||||
"end_col_offset_idx": 5,
|
||||
"text": "Company",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -980,7 +980,7 @@
|
||||
"start_col_offset_idx": 5,
|
||||
"end_col_offset_idx": 6,
|
||||
"text": "City",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -992,7 +992,7 @@
|
||||
"start_col_offset_idx": 6,
|
||||
"end_col_offset_idx": 7,
|
||||
"text": "Country",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1004,7 +1004,7 @@
|
||||
"start_col_offset_idx": 7,
|
||||
"end_col_offset_idx": 8,
|
||||
"text": "Phone 1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1016,7 +1016,7 @@
|
||||
"start_col_offset_idx": 8,
|
||||
"end_col_offset_idx": 9,
|
||||
"text": "Phone 2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1028,7 +1028,7 @@
|
||||
"start_col_offset_idx": 9,
|
||||
"end_col_offset_idx": 10,
|
||||
"text": "Email",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1040,7 +1040,7 @@
|
||||
"start_col_offset_idx": 10,
|
||||
"end_col_offset_idx": 11,
|
||||
"text": "Subscription Date",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1052,7 +1052,7 @@
|
||||
"start_col_offset_idx": 11,
|
||||
"end_col_offset_idx": 12,
|
||||
"text": "Website",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -63,7 +63,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -75,7 +75,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -87,7 +87,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "4",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -284,7 +284,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -296,7 +296,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -308,7 +308,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -320,7 +320,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "4",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -63,7 +63,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -75,7 +75,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -87,7 +87,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "4",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -308,7 +308,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -320,7 +320,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -332,7 +332,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -344,7 +344,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "4",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
|
||||
40
tests/data/groundtruth/docling_v2/equations.docx.itxt
Normal file
40
tests/data/groundtruth/docling_v2/equations.docx.itxt
Normal file
@@ -0,0 +1,40 @@
|
||||
item-0 at level 0: unspecified: group _root_
|
||||
item-1 at level 1: inline: group group
|
||||
item-2 at level 2: paragraph: This is a word document and this is an inline equation:
|
||||
item-3 at level 2: formula: A= \pi r^{2}
|
||||
item-4 at level 2: paragraph: . If instead, I want an equation by line, I can do this:
|
||||
item-5 at level 1: paragraph:
|
||||
item-6 at level 1: formula: a^{2}+b^{2}=c^{2} \text{ \texttimes } 23
|
||||
item-7 at level 1: paragraph: And that is an equation by itself. Cheers!
|
||||
item-8 at level 1: paragraph:
|
||||
item-9 at level 1: paragraph: This is another equation:
|
||||
item-10 at level 1: formula: f\left(x\right)=a_{0}+\sum_{n=1} ... })+b_{n}\sin(\frac{n \pi x}{L})\right)
|
||||
item-11 at level 1: paragraph:
|
||||
item-12 at level 1: paragraph: This is text. This is text. This ... s is text. This is text. This is text.
|
||||
item-13 at level 1: paragraph:
|
||||
item-14 at level 1: paragraph:
|
||||
item-15 at level 1: inline: group group
|
||||
item-16 at level 2: paragraph: This is a word document and this is an inline equation:
|
||||
item-17 at level 2: formula: A= \pi r^{2}
|
||||
item-18 at level 2: paragraph: . If instead, I want an equation by line, I can do this:
|
||||
item-19 at level 1: paragraph:
|
||||
item-20 at level 1: formula: \left(x+a\right)^{n}=\sum_{k=0}^ ... ac{}{}{0pt}{}{n}{k}\right)x^{k}a^{n-k}
|
||||
item-21 at level 1: paragraph:
|
||||
item-22 at level 1: paragraph: And that is an equation by itself. Cheers!
|
||||
item-23 at level 1: paragraph:
|
||||
item-24 at level 1: paragraph: This is another equation:
|
||||
item-25 at level 1: paragraph:
|
||||
item-26 at level 1: formula: \left(1+x\right)^{n}=1+\frac{nx} ... ght)x^{2}}{2!}+ \text{ \textellipsis }
|
||||
item-27 at level 1: paragraph:
|
||||
item-28 at level 1: paragraph: This is text. This is text. This ... s is text. This is text. This is text.
|
||||
item-29 at level 1: paragraph:
|
||||
item-30 at level 1: paragraph:
|
||||
item-31 at level 1: inline: group group
|
||||
item-32 at level 2: paragraph: This is a word document and this is an inline equation:
|
||||
item-33 at level 2: formula: A= \pi r^{2}
|
||||
item-34 at level 2: paragraph: . If instead, I want an equation by line, I can do this:
|
||||
item-35 at level 1: paragraph:
|
||||
item-36 at level 1: formula: e^{x}=1+\frac{x}{1!}+\frac{x^{2} ... xtellipsis } , - \infty < x < \infty
|
||||
item-37 at level 1: paragraph:
|
||||
item-38 at level 1: paragraph: And that is an equation by itself. Cheers!
|
||||
item-39 at level 1: paragraph:
|
||||
616
tests/data/groundtruth/docling_v2/equations.docx.json
Normal file
616
tests/data/groundtruth/docling_v2/equations.docx.json
Normal file
@@ -0,0 +1,616 @@
|
||||
{
|
||||
"schema_name": "DoclingDocument",
|
||||
"version": "1.2.0",
|
||||
"name": "equations",
|
||||
"origin": {
|
||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"binary_hash": 11121138535595486899,
|
||||
"filename": "equations.docx"
|
||||
},
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
"body": {
|
||||
"self_ref": "#/body",
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/3"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/4"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/5"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/6"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/7"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/8"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/9"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/10"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/11"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/12"
|
||||
},
|
||||
{
|
||||
"$ref": "#/groups/1"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/16"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/17"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/18"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/19"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/20"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/21"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/22"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/23"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/24"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/25"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/26"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/27"
|
||||
},
|
||||
{
|
||||
"$ref": "#/groups/2"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/31"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/32"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/33"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/34"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/35"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
"groups": [
|
||||
{
|
||||
"self_ref": "#/groups/0",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/0"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/1"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/2"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "group",
|
||||
"label": "inline"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/groups/1",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/13"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/14"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/15"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "group",
|
||||
"label": "inline"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/groups/2",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/28"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/29"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/30"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "group",
|
||||
"label": "inline"
|
||||
}
|
||||
],
|
||||
"texts": [
|
||||
{
|
||||
"self_ref": "#/texts/0",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "This is a word document and this is an inline equation: ",
|
||||
"text": "This is a word document and this is an inline equation: "
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/1",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "A= \\pi r^{2} ",
|
||||
"text": "A= \\pi r^{2} "
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/2",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": ". If instead, I want an equation by line, I can do this:",
|
||||
"text": ". If instead, I want an equation by line, I can do this:"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/3",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/4",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23",
|
||||
"text": "a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/5",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "And that is an equation by itself. Cheers!",
|
||||
"text": "And that is an equation by itself. Cheers!"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/6",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/7",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "This is another equation:",
|
||||
"text": "This is another equation:"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/8",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)",
|
||||
"text": "f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/9",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/10",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
|
||||
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text."
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/11",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/12",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/13",
|
||||
"parent": {
|
||||
"$ref": "#/groups/1"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "This is a word document and this is an inline equation: ",
|
||||
"text": "This is a word document and this is an inline equation: "
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/14",
|
||||
"parent": {
|
||||
"$ref": "#/groups/1"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "A= \\pi r^{2} ",
|
||||
"text": "A= \\pi r^{2} "
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/15",
|
||||
"parent": {
|
||||
"$ref": "#/groups/1"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": ". If instead, I want an equation by line, I can do this:",
|
||||
"text": ". If instead, I want an equation by line, I can do this:"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/16",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/17",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}",
|
||||
"text": "\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/18",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/19",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "And that is an equation by itself. Cheers!",
|
||||
"text": "And that is an equation by itself. Cheers!"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/20",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/21",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "This is another equation:",
|
||||
"text": "This is another equation:"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/22",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/23",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis }",
|
||||
"text": "\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis }"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/24",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/25",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
|
||||
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text."
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/26",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/27",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/28",
|
||||
"parent": {
|
||||
"$ref": "#/groups/2"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "This is a word document and this is an inline equation: ",
|
||||
"text": "This is a word document and this is an inline equation: "
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/29",
|
||||
"parent": {
|
||||
"$ref": "#/groups/2"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "A= \\pi r^{2} ",
|
||||
"text": "A= \\pi r^{2} "
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/30",
|
||||
"parent": {
|
||||
"$ref": "#/groups/2"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": ". If instead, I want an equation by line, I can do this:",
|
||||
"text": ". If instead, I want an equation by line, I can do this:"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/31",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/32",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty",
|
||||
"text": "e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/33",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/34",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "And that is an equation by itself. Cheers!",
|
||||
"text": "And that is an equation by itself. Cheers!"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/35",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "paragraph",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
}
|
||||
],
|
||||
"pictures": [],
|
||||
"tables": [],
|
||||
"key_value_items": [],
|
||||
"form_items": [],
|
||||
"pages": {}
|
||||
}
|
||||
29
tests/data/groundtruth/docling_v2/equations.docx.md
Normal file
29
tests/data/groundtruth/docling_v2/equations.docx.md
Normal file
@@ -0,0 +1,29 @@
|
||||
This is a word document and this is an inline equation: $A= \pi r^{2} $ . If instead, I want an equation by line, I can do this:
|
||||
|
||||
$$a^{2}+b^{2}=c^{2} \text{ \texttimes } 23$$
|
||||
|
||||
And that is an equation by itself. Cheers!
|
||||
|
||||
This is another equation:
|
||||
|
||||
$$f\left(x\right)=a_{0}+\sum_{n=1}^{ \infty }\left(a_{n}\cos(\frac{n \pi x}{L})+b_{n}\sin(\frac{n \pi x}{L})\right)$$
|
||||
|
||||
This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.
|
||||
|
||||
This is a word document and this is an inline equation: $A= \pi r^{2} $ . If instead, I want an equation by line, I can do this:
|
||||
|
||||
$$\left(x+a\right)^{n}=\sum_{k=0}^{n}\left(\genfrac{}{}{0pt}{}{n}{k}\right)x^{k}a^{n-k}$$
|
||||
|
||||
And that is an equation by itself. Cheers!
|
||||
|
||||
This is another equation:
|
||||
|
||||
$$\left(1+x\right)^{n}=1+\frac{nx}{1!}+\frac{n\left(n-1\right)x^{2}}{2!}+ \text{ \textellipsis }$$
|
||||
|
||||
This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.
|
||||
|
||||
This is a word document and this is an inline equation: $A= \pi r^{2} $ . If instead, I want an equation by line, I can do this:
|
||||
|
||||
$$e^{x}=1+\frac{x}{1!}+\frac{x^{2}}{2!}+\frac{x^{3}}{3!}+ \text{ \textellipsis } , - \infty < x < \infty$$
|
||||
|
||||
And that is an equation by itself. Cheers!
|
||||
@@ -344,7 +344,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Header 1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -356,7 +356,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Header 2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -368,7 +368,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -493,7 +493,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Header 1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -505,7 +505,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Header 2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -517,7 +517,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
|
||||
@@ -68,7 +68,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Header 1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -80,7 +80,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 2 & 3 (colspan)",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -181,7 +181,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Header 1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -193,7 +193,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 2 & 3 (colspan)",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -205,7 +205,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 2 & 3 (colspan)",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
|
||||
@@ -68,7 +68,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Header 1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -80,7 +80,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 2 & 3 (colspan)",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -181,7 +181,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Header 1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -193,7 +193,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 2 & 3 (colspan)",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -205,7 +205,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 2 & 3 (colspan)",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
|
||||
22
tests/data/groundtruth/docling_v2/example_07.html.itxt
Normal file
22
tests/data/groundtruth/docling_v2/example_07.html.itxt
Normal file
@@ -0,0 +1,22 @@
|
||||
item-0 at level 0: unspecified: group _root_
|
||||
item-1 at level 1: list: group list
|
||||
item-2 at level 2: list_item: Asia
|
||||
item-3 at level 3: list: group list
|
||||
item-4 at level 4: list_item: China
|
||||
item-5 at level 4: list_item: Japan
|
||||
item-6 at level 4: list_item: Thailand
|
||||
item-7 at level 2: list_item: Europe
|
||||
item-8 at level 3: list: group list
|
||||
item-9 at level 4: list_item: UK
|
||||
item-10 at level 4: list_item: Germany
|
||||
item-11 at level 4: list_item: Switzerland
|
||||
item-12 at level 5: list: group list
|
||||
item-13 at level 6: list: group list
|
||||
item-14 at level 7: list_item: Bern
|
||||
item-15 at level 7: list_item: Aargau
|
||||
item-16 at level 4: list_item: Italy
|
||||
item-17 at level 5: list: group list
|
||||
item-18 at level 6: list: group list
|
||||
item-19 at level 7: list_item: Piedmont
|
||||
item-20 at level 7: list_item: Liguria
|
||||
item-21 at level 2: list_item: Africa
|
||||
374
tests/data/groundtruth/docling_v2/example_07.html.json
Normal file
374
tests/data/groundtruth/docling_v2/example_07.html.json
Normal file
@@ -0,0 +1,374 @@
|
||||
{
|
||||
"schema_name": "DoclingDocument",
|
||||
"version": "1.2.0",
|
||||
"name": "example_07",
|
||||
"origin": {
|
||||
"mimetype": "text/html",
|
||||
"binary_hash": 623628706615267627,
|
||||
"filename": "example_07.html"
|
||||
},
|
||||
"furniture": {
|
||||
"self_ref": "#/furniture",
|
||||
"children": [],
|
||||
"content_layer": "furniture",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
"body": {
|
||||
"self_ref": "#/body",
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/groups/0"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "_root_",
|
||||
"label": "unspecified"
|
||||
},
|
||||
"groups": [
|
||||
{
|
||||
"self_ref": "#/groups/0",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/0"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/4"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/13"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "list",
|
||||
"label": "list"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/groups/1",
|
||||
"parent": {
|
||||
"$ref": "#/texts/0"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/1"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/2"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/3"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "list",
|
||||
"label": "list"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/groups/2",
|
||||
"parent": {
|
||||
"$ref": "#/texts/4"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/5"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/6"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/7"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/10"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "list",
|
||||
"label": "list"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/groups/3",
|
||||
"parent": {
|
||||
"$ref": "#/texts/7"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/groups/4"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "list",
|
||||
"label": "list"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/groups/4",
|
||||
"parent": {
|
||||
"$ref": "#/groups/3"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/8"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/9"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "list",
|
||||
"label": "list"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/groups/5",
|
||||
"parent": {
|
||||
"$ref": "#/texts/10"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/groups/6"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "list",
|
||||
"label": "list"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/groups/6",
|
||||
"parent": {
|
||||
"$ref": "#/groups/5"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/texts/11"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/12"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"name": "list",
|
||||
"label": "list"
|
||||
}
|
||||
],
|
||||
"texts": [
|
||||
{
|
||||
"self_ref": "#/texts/0",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/groups/1"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "Asia",
|
||||
"text": "Asia",
|
||||
"enumerated": false,
|
||||
"marker": "-"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/1",
|
||||
"parent": {
|
||||
"$ref": "#/groups/1"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "China",
|
||||
"text": "China",
|
||||
"enumerated": false,
|
||||
"marker": "-"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/2",
|
||||
"parent": {
|
||||
"$ref": "#/groups/1"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "Japan",
|
||||
"text": "Japan",
|
||||
"enumerated": false,
|
||||
"marker": "-"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/3",
|
||||
"parent": {
|
||||
"$ref": "#/groups/1"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "Thailand",
|
||||
"text": "Thailand",
|
||||
"enumerated": false,
|
||||
"marker": "-"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/4",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/groups/2"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "Europe",
|
||||
"text": "Europe",
|
||||
"enumerated": false,
|
||||
"marker": "-"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/5",
|
||||
"parent": {
|
||||
"$ref": "#/groups/2"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "UK",
|
||||
"text": "UK",
|
||||
"enumerated": false,
|
||||
"marker": "-"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/6",
|
||||
"parent": {
|
||||
"$ref": "#/groups/2"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "Germany",
|
||||
"text": "Germany",
|
||||
"enumerated": false,
|
||||
"marker": "-"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/7",
|
||||
"parent": {
|
||||
"$ref": "#/groups/2"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/groups/3"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "Switzerland",
|
||||
"text": "Switzerland",
|
||||
"enumerated": false,
|
||||
"marker": "-"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/8",
|
||||
"parent": {
|
||||
"$ref": "#/groups/4"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "Bern",
|
||||
"text": "Bern",
|
||||
"enumerated": false,
|
||||
"marker": "-"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/9",
|
||||
"parent": {
|
||||
"$ref": "#/groups/4"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "Aargau",
|
||||
"text": "Aargau",
|
||||
"enumerated": false,
|
||||
"marker": "-"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/10",
|
||||
"parent": {
|
||||
"$ref": "#/groups/2"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"$ref": "#/groups/5"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "Italy",
|
||||
"text": "Italy",
|
||||
"enumerated": false,
|
||||
"marker": "-"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/11",
|
||||
"parent": {
|
||||
"$ref": "#/groups/6"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "Piedmont",
|
||||
"text": "Piedmont",
|
||||
"enumerated": false,
|
||||
"marker": "-"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/12",
|
||||
"parent": {
|
||||
"$ref": "#/groups/6"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "Liguria",
|
||||
"text": "Liguria",
|
||||
"enumerated": false,
|
||||
"marker": "-"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/13",
|
||||
"parent": {
|
||||
"$ref": "#/groups/0"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "list_item",
|
||||
"prov": [],
|
||||
"orig": "Africa",
|
||||
"text": "Africa",
|
||||
"enumerated": false,
|
||||
"marker": "-"
|
||||
}
|
||||
],
|
||||
"pictures": [],
|
||||
"tables": [],
|
||||
"key_value_items": [],
|
||||
"form_items": [],
|
||||
"pages": {}
|
||||
}
|
||||
14
tests/data/groundtruth/docling_v2/example_07.html.md
Normal file
14
tests/data/groundtruth/docling_v2/example_07.html.md
Normal file
@@ -0,0 +1,14 @@
|
||||
- Asia
|
||||
- China
|
||||
- Japan
|
||||
- Thailand
|
||||
- Europe
|
||||
- UK
|
||||
- Germany
|
||||
- Switzerland
|
||||
- Bern
|
||||
- Aargau
|
||||
- Italy
|
||||
- Piedmont
|
||||
- Liguria
|
||||
- Africa
|
||||
@@ -960,7 +960,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "Class1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -972,7 +972,7 @@
|
||||
"start_col_offset_idx": 4,
|
||||
"end_col_offset_idx": 7,
|
||||
"text": "Class2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1385,7 +1385,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "Class1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1397,7 +1397,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "Class1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1409,7 +1409,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "Class1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1421,7 +1421,7 @@
|
||||
"start_col_offset_idx": 4,
|
||||
"end_col_offset_idx": 7,
|
||||
"text": "Class2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1433,7 +1433,7 @@
|
||||
"start_col_offset_idx": 4,
|
||||
"end_col_offset_idx": 7,
|
||||
"text": "Class2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1445,7 +1445,7 @@
|
||||
"start_col_offset_idx": 4,
|
||||
"end_col_offset_idx": 7,
|
||||
"text": "Class2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
|
||||
@@ -176,7 +176,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Tab1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -188,7 +188,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Tab2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -200,7 +200,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Tab3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -289,7 +289,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Tab1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -301,7 +301,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Tab2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -313,7 +313,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Tab3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
|
||||
@@ -136,7 +136,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "first ",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -148,7 +148,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "second ",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -160,7 +160,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "third",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -393,7 +393,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "first ",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -405,7 +405,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "second ",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -417,7 +417,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "third",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
@@ -675,7 +675,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "col-1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -687,7 +687,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "col-2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -699,7 +699,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "col-3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -711,7 +711,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "col-4",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1112,7 +1112,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "col-1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1124,7 +1124,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "col-2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1136,7 +1136,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "col-3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1148,7 +1148,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "col-4",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
@@ -1578,7 +1578,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "col-1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1590,7 +1590,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "col-2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1602,7 +1602,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "col-3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1763,7 +1763,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "col-1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1775,7 +1775,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "col-2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1787,7 +1787,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "col-3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
@@ -1969,7 +1969,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "col-1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1981,7 +1981,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "col-2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1993,7 +1993,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "col-3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -2154,7 +2154,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "col-1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -2166,7 +2166,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "col-2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -2178,7 +2178,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "col-3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
@@ -2360,7 +2360,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "first ",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -2372,7 +2372,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "header",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -2545,7 +2545,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "first ",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -2557,7 +2557,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "header",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -2569,7 +2569,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "header",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
@@ -2583,7 +2583,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "first ",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -2827,7 +2827,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "first (f)",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -2839,7 +2839,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "header (f)",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -3012,7 +3012,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "first (f)",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -3024,7 +3024,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "header (f)",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -3036,7 +3036,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "header (f)",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
@@ -3050,7 +3050,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "first (f)",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
|
||||
@@ -7914,7 +7914,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Duck\n",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -7950,7 +7950,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Scientific classification \n",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -8130,7 +8130,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Subfamilies\n",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -8159,7 +8159,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Duck\n",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -8171,7 +8171,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Duck\n",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
@@ -8237,7 +8237,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Scientific classification \n",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -8249,7 +8249,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Scientific classification \n",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
@@ -8445,7 +8445,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Subfamilies\n",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -8457,7 +8457,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Subfamilies\n",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
@@ -8513,7 +8513,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Authority control databases ",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -8578,7 +8578,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Authority control databases ",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -8590,7 +8590,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Authority control databases ",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
|
||||
@@ -490,7 +490,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -502,7 +502,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Food",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -514,7 +514,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Calories per portion",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -639,7 +639,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -651,7 +651,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Food",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -663,7 +663,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Calories per portion",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
|
||||
@@ -71,19 +71,19 @@
|
||||
</head>
|
||||
<h2>Test with tables</h2>
|
||||
<p>A uniform table</p>
|
||||
<table><tbody><tr><td>Header 0.0</td><td>Header 0.1</td><td>Header 0.2</td></tr><tr><td>Cell 1.0</td><td>Cell 1.1</td><td>Cell 1.2</td></tr><tr><td>Cell 2.0</td><td>Cell 2.1</td><td>Cell 2.2</td></tr></tbody></table>
|
||||
<table><tbody><tr><th>Header 0.0</th><th>Header 0.1</th><th>Header 0.2</th></tr><tr><td>Cell 1.0</td><td>Cell 1.1</td><td>Cell 1.2</td></tr><tr><td>Cell 2.0</td><td>Cell 2.1</td><td>Cell 2.2</td></tr></tbody></table>
|
||||
<p></p>
|
||||
<p>A non-uniform table with horizontal spans</p>
|
||||
<table><tbody><tr><td>Header 0.0</td><td>Header 0.1</td><td>Header 0.2</td></tr><tr><td>Cell 1.0</td><td colspan="2">Merged Cell 1.1 1.2</td></tr><tr><td>Cell 2.0</td><td colspan="2">Merged Cell 2.1 2.2</td></tr></tbody></table>
|
||||
<table><tbody><tr><th>Header 0.0</th><th>Header 0.1</th><th>Header 0.2</th></tr><tr><td>Cell 1.0</td><td colspan="2">Merged Cell 1.1 1.2</td></tr><tr><td>Cell 2.0</td><td colspan="2">Merged Cell 2.1 2.2</td></tr></tbody></table>
|
||||
<p></p>
|
||||
<p>A non-uniform table with horizontal spans in inner columns</p>
|
||||
<table><tbody><tr><td>Header 0.0</td><td>Header 0.1</td><td>Header 0.2</td><td>Header 0.3</td></tr><tr><td>Cell 1.0</td><td colspan="2">Merged Cell 1.1 1.2</td><td>Cell 1.3</td></tr><tr><td>Cell 2.0</td><td colspan="2">Merged Cell 2.1 2.2</td><td>Cell 2.3</td></tr></tbody></table>
|
||||
<table><tbody><tr><th>Header 0.0</th><th>Header 0.1</th><th>Header 0.2</th><th>Header 0.3</th></tr><tr><td>Cell 1.0</td><td colspan="2">Merged Cell 1.1 1.2</td><td>Cell 1.3</td></tr><tr><td>Cell 2.0</td><td colspan="2">Merged Cell 2.1 2.2</td><td>Cell 2.3</td></tr></tbody></table>
|
||||
<p></p>
|
||||
<p>A non-uniform table with vertical spans</p>
|
||||
<table><tbody><tr><td>Header 0.0</td><td>Header 0.1</td><td>Header 0.2</td></tr><tr><td>Cell 1.0</td><td rowspan="2">Merged Cell 1.1 2.1</td><td>Cell 1.2</td></tr><tr><td>Cell 2.0</td><td>Cell 2.2</td></tr><tr><td>Cell 3.0</td><td rowspan="2">Merged Cell 3.1 4.1</td><td>Cell 3.2</td></tr><tr><td>Cell 4.0</td><td>Cell 4.2</td></tr></tbody></table>
|
||||
<table><tbody><tr><th>Header 0.0</th><th>Header 0.1</th><th>Header 0.2</th></tr><tr><td>Cell 1.0</td><td rowspan="2">Merged Cell 1.1 2.1</td><td>Cell 1.2</td></tr><tr><td>Cell 2.0</td><td>Cell 2.2</td></tr><tr><td>Cell 3.0</td><td rowspan="2">Merged Cell 3.1 4.1</td><td>Cell 3.2</td></tr><tr><td>Cell 4.0</td><td>Cell 4.2</td></tr></tbody></table>
|
||||
<p></p>
|
||||
<p>A non-uniform table with all kinds of spans and empty cells</p>
|
||||
<table><tbody><tr><td>Header 0.0</td><td>Header 0.1</td><td>Header 0.2</td><td></td><td></td></tr><tr><td>Cell 1.0</td><td rowspan="2">Merged Cell 1.1 2.1</td><td>Cell 1.2</td><td></td><td></td></tr><tr><td>Cell 2.0</td><td>Cell 2.2</td><td></td><td></td></tr><tr><td>Cell 3.0</td><td rowspan="2">Merged Cell 3.1 4.1</td><td>Cell 3.2</td><td rowspan="3"></td><td></td></tr><tr><td>Cell 4.0</td><td>Cell 4.2</td><td rowspan="2">Merged Cell 4.4 5.4</td></tr><tr><td></td><td></td><td></td></tr><tr><td></td><td></td><td></td><td></td><td></td></tr><tr><td colspan="5"></td></tr><tr><td></td><td></td><td></td><td></td><td>Cell 8.4</td></tr></tbody></table>
|
||||
<table><tbody><tr><th>Header 0.0</th><th>Header 0.1</th><th>Header 0.2</th><th></th><th></th></tr><tr><td>Cell 1.0</td><td rowspan="2">Merged Cell 1.1 2.1</td><td>Cell 1.2</td><td></td><td></td></tr><tr><td>Cell 2.0</td><td>Cell 2.2</td><td></td><td></td></tr><tr><td>Cell 3.0</td><td rowspan="2">Merged Cell 3.1 4.1</td><td>Cell 3.2</td><td rowspan="3"></td><td></td></tr><tr><td>Cell 4.0</td><td>Cell 4.2</td><td rowspan="2">Merged Cell 4.4 5.4</td></tr><tr><td></td><td></td><td></td></tr><tr><td></td><td></td><td></td><td></td><td></td></tr><tr><td colspan="5"></td></tr><tr><td></td><td></td><td></td><td></td><td>Cell 8.4</td></tr></tbody></table>
|
||||
<p></p>
|
||||
<p></p>
|
||||
</html>
|
||||
@@ -261,7 +261,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Header 0.0",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -273,7 +273,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Header 0.1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -285,7 +285,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 0.2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -374,7 +374,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Header 0.0",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -386,7 +386,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Header 0.1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -398,7 +398,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 0.2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
@@ -504,7 +504,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Header 0.0",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -516,7 +516,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Header 0.1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -528,7 +528,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 0.2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -593,7 +593,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Header 0.0",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -605,7 +605,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Header 0.1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -617,7 +617,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 0.2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
@@ -723,7 +723,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Header 0.0",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -735,7 +735,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Header 0.1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -747,7 +747,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 0.2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -759,7 +759,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "Header 0.3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -848,7 +848,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Header 0.0",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -860,7 +860,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Header 0.1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -872,7 +872,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 0.2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -884,7 +884,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "Header 0.3",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
@@ -1014,7 +1014,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Header 0.0",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1026,7 +1026,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Header 0.1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1038,7 +1038,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 0.2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1175,7 +1175,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Header 0.0",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1187,7 +1187,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Header 0.1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1199,7 +1199,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 0.2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
@@ -1381,7 +1381,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Header 0.0",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1393,7 +1393,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Header 0.1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1405,7 +1405,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 0.2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1417,7 +1417,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1429,7 +1429,7 @@
|
||||
"start_col_offset_idx": 4,
|
||||
"end_col_offset_idx": 5,
|
||||
"text": "",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1818,7 +1818,7 @@
|
||||
"start_col_offset_idx": 0,
|
||||
"end_col_offset_idx": 1,
|
||||
"text": "Header 0.0",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1830,7 +1830,7 @@
|
||||
"start_col_offset_idx": 1,
|
||||
"end_col_offset_idx": 2,
|
||||
"text": "Header 0.1",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1842,7 +1842,7 @@
|
||||
"start_col_offset_idx": 2,
|
||||
"end_col_offset_idx": 3,
|
||||
"text": "Header 0.2",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1854,7 +1854,7 @@
|
||||
"start_col_offset_idx": 3,
|
||||
"end_col_offset_idx": 4,
|
||||
"text": "",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
},
|
||||
@@ -1866,7 +1866,7 @@
|
||||
"start_col_offset_idx": 4,
|
||||
"end_col_offset_idx": 5,
|
||||
"text": "",
|
||||
"column_header": false,
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
}
|
||||
|
||||
40
tests/data/html/example_07.html
Normal file
40
tests/data/html/example_07.html
Normal file
@@ -0,0 +1,40 @@
|
||||
<html>
|
||||
<body>
|
||||
<ul>
|
||||
<li>Asia
|
||||
<ul>
|
||||
<li>China</li>
|
||||
<li>Japan</li>
|
||||
<li>Thailand</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>Europe
|
||||
<ul>
|
||||
<li>UK</li>
|
||||
<li>Germany</li>
|
||||
<li>Switzerland
|
||||
<ul>
|
||||
<li style="list-style-type: none;">
|
||||
<ul>
|
||||
<li>Bern</li>
|
||||
<li>Aargau</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>Italy
|
||||
<ul>
|
||||
<li style="list-style-type: none;">
|
||||
<ul>
|
||||
<li>Piedmont</li>
|
||||
<li>Liguria</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>Africa</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
@@ -59,7 +59,11 @@ def test_e2e_valid_csv_conversions():
|
||||
pred_itxt, str(gt_path) + ".itxt"
|
||||
), "export to indented-text"
|
||||
|
||||
assert verify_document(doc, str(gt_path) + ".json"), "export to json"
|
||||
assert verify_document(
|
||||
pred_doc=doc,
|
||||
gtfile=str(gt_path) + ".json",
|
||||
generate=GENERATE,
|
||||
), "export to json"
|
||||
|
||||
|
||||
def test_e2e_invalid_csv_conversions():
|
||||
|
||||
@@ -91,4 +91,8 @@ def test_e2e_docx_conversions():
|
||||
|
||||
if docx_path.name == "word_tables.docx":
|
||||
pred_html: str = doc.export_to_html()
|
||||
assert verify_export(pred_html, str(gt_path) + ".html"), "export to html"
|
||||
assert verify_export(
|
||||
pred_text=pred_html,
|
||||
gtfile=str(gt_path) + ".html",
|
||||
generate=GENERATE,
|
||||
), "export to html"
|
||||
|
||||
@@ -179,7 +179,7 @@ def test_guess_format(tmp_path):
|
||||
# Non-Docling JSON
|
||||
# TODO: Docling JSON is currently the single supported JSON flavor and the pipeline
|
||||
# will try to validate *any* JSON (based on suffix/MIME) as Docling JSON; proper
|
||||
# disambiguation seen as part of https://github.com/DS4SD/docling/issues/802
|
||||
# disambiguation seen as part of https://github.com/docling-project/docling/issues/802
|
||||
test_str = "{}"
|
||||
stream = DocumentStream(name="test.json", stream=BytesIO(f"{test_str}".encode()))
|
||||
assert dci._guess_format(stream) == InputFormat.JSON_DOCLING
|
||||
|
||||
Reference in New Issue
Block a user