feat(xlsx): create a page for each worksheet in XLSX backend (#1332)

* sytle(xlsx): enforce type hints in XLSX backend

Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>

* feat(xlsx): create a page for each worksheet in XLSX backend

Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>

* docs(xlsx): add docstrings to XLSX backend module.

Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>

* docling(xlsx): add bounding boxes and page size information in cell units

Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>

---------

Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
Cesar Berrospi Ramis
2025-04-11 10:29:53 +02:00
committed by GitHub
parent c605edd8e9
commit eef2bdea77
3 changed files with 452 additions and 101 deletions

View File

@@ -97,7 +97,22 @@
"children": [],
"content_layer": "body",
"label": "picture",
"prov": [],
"prov": [
{
"page_no": 3,
"bbox": {
"l": 8.0,
"t": 18.0,
"r": 13.0,
"b": 36.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
0
]
}
],
"captions": [],
"references": [],
"footnotes": [],
@@ -122,7 +137,22 @@
"children": [],
"content_layer": "body",
"label": "table",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 3.0,
"b": 7.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
0
]
}
],
"captions": [],
"references": [],
"footnotes": [],
@@ -661,7 +691,22 @@
"children": [],
"content_layer": "body",
"label": "table",
"prov": [],
"prov": [
{
"page_no": 2,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 4.0,
"b": 9.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
0
]
}
],
"captions": [],
"references": [],
"footnotes": [],
@@ -1564,7 +1609,22 @@
"children": [],
"content_layer": "body",
"label": "table",
"prov": [],
"prov": [
{
"page_no": 2,
"bbox": {
"l": 6.0,
"t": 4.0,
"r": 9.0,
"b": 9.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
0
]
}
],
"captions": [],
"references": [],
"footnotes": [],
@@ -1955,7 +2015,22 @@
"children": [],
"content_layer": "body",
"label": "table",
"prov": [],
"prov": [
{
"page_no": 2,
"bbox": {
"l": 2.0,
"t": 13.0,
"r": 5.0,
"b": 18.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
0
]
}
],
"captions": [],
"references": [],
"footnotes": [],
@@ -2346,7 +2421,22 @@
"children": [],
"content_layer": "body",
"label": "table",
"prov": [],
"prov": [
{
"page_no": 3,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 3.0,
"b": 7.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
0
]
}
],
"captions": [],
"references": [],
"footnotes": [],
@@ -2813,7 +2903,22 @@
"children": [],
"content_layer": "body",
"label": "table",
"prov": [],
"prov": [
{
"page_no": 3,
"bbox": {
"l": 4.0,
"t": 6.0,
"r": 7.0,
"b": 13.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
0
]
}
],
"captions": [],
"references": [],
"footnotes": [],
@@ -3275,5 +3380,27 @@
],
"key_value_items": [],
"form_items": [],
"pages": {}
"pages": {
"1": {
"size": {
"width": 3.0,
"height": 7.0
},
"page_no": 1
},
"2": {
"size": {
"width": 9.0,
"height": 18.0
},
"page_no": 2
},
"3": {
"size": {
"width": 13.0,
"height": 36.0
},
"page_no": 3
}
}
}