mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
feat: Rich tables support for HTML backend (#2324)
* Rich tables support for HTML backend Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> * Decoupling JATS backend from HTML backend, ways of creating tables changed significantly Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> * updated and added tests Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> * Refactored parse_table_data in html_backend into few smaller functions Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> * Changing scope of few functions in html_backend.py, making them static, when possible Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> * Fix for HTML tables that have tbody and/or thead, now these tables are also properly supported Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> --------- Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> Co-authored-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"schema_name": "DoclingDocument",
|
||||
"version": "1.6.0",
|
||||
"version": "1.7.0",
|
||||
"name": "example_03",
|
||||
"origin": {
|
||||
"mimetype": "text/html",
|
||||
@@ -346,7 +346,8 @@
|
||||
"text": "Header 1",
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -358,7 +359,8 @@
|
||||
"text": "Header 2",
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -370,7 +372,8 @@
|
||||
"text": "Header 3",
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -382,7 +385,8 @@
|
||||
"text": "Row 1, Col 1",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -394,7 +398,8 @@
|
||||
"text": "Row 1, Col 2",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -406,7 +411,8 @@
|
||||
"text": "Row 1, Col 3",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -418,7 +424,8 @@
|
||||
"text": "Row 2, Col 1",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -430,7 +437,8 @@
|
||||
"text": "Row 2, Col 2",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -442,7 +450,8 @@
|
||||
"text": "Row 2, Col 3",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -454,7 +463,8 @@
|
||||
"text": "Row 3, Col 1",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -466,7 +476,8 @@
|
||||
"text": "Row 3, Col 2",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -478,7 +489,8 @@
|
||||
"text": "Row 3, Col 3",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
}
|
||||
],
|
||||
"num_rows": 4,
|
||||
@@ -495,7 +507,8 @@
|
||||
"text": "Header 1",
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -507,7 +520,8 @@
|
||||
"text": "Header 2",
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -519,7 +533,8 @@
|
||||
"text": "Header 3",
|
||||
"column_header": true,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
}
|
||||
],
|
||||
[
|
||||
@@ -533,7 +548,8 @@
|
||||
"text": "Row 1, Col 1",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -545,7 +561,8 @@
|
||||
"text": "Row 1, Col 2",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -557,7 +574,8 @@
|
||||
"text": "Row 1, Col 3",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
}
|
||||
],
|
||||
[
|
||||
@@ -571,7 +589,8 @@
|
||||
"text": "Row 2, Col 1",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -583,7 +602,8 @@
|
||||
"text": "Row 2, Col 2",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -595,7 +615,8 @@
|
||||
"text": "Row 2, Col 3",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
}
|
||||
],
|
||||
[
|
||||
@@ -609,7 +630,8 @@
|
||||
"text": "Row 3, Col 1",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -621,7 +643,8 @@
|
||||
"text": "Row 3, Col 2",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
},
|
||||
{
|
||||
"row_span": 1,
|
||||
@@ -633,7 +656,8 @@
|
||||
"text": "Row 3, Col 3",
|
||||
"column_header": false,
|
||||
"row_header": false,
|
||||
"row_section": false
|
||||
"row_section": false,
|
||||
"fillable": false
|
||||
}
|
||||
]
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user