feat(msexcel): set ContentLayer.INVISIBLE for invisible sheet (#1876)

* feat(msexcel): ignore invisible sheet

* DCO Remediation Commit for Qiefan Jiang <jiangqiefan@bytedance.com>

I, Qiefan Jiang <jiangqiefan@bytedance.com>, hereby add my Signed-off-by to this commit: ca391f4908f44f301de54a97057f0b809f5ce66c

Signed-off-by: Qiefan Jiang <jiangqiefan@bytedance.com>

* retain invisible sheet with ContentLayer.INVISIBLE

Signed-off-by: Qiefan Jiang <jiangqiefan@bytedance.com>

* update UT

Signed-off-by: Qiefan Jiang <jiangqiefan@bytedance.com>

* fix: use Optional for python3.9

Signed-off-by: Qiefan Jiang <jiangqiefan@bytedance.com>

* DCO Remediation Commit for Qiefan Jiang <jiangqiefan@bytedance.com>

I, Qiefan Jiang <jiangqiefan@bytedance.com>, hereby add my Signed-off-by to this commit: a34371a90e

Signed-off-by: Qiefan Jiang <jiangqiefan@bytedance.com>

---------

Signed-off-by: Qiefan Jiang <jiangqiefan@bytedance.com>
This commit is contained in:
Qiefan Jiang
2025-09-01 19:53:45 +08:00
committed by GitHub
parent be26044f14
commit a283ccff25
4 changed files with 132 additions and 4 deletions

View File

@@ -4,7 +4,7 @@
"name": "test-01",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"binary_hash": 13665052226482254103,
"binary_hash": 5648670872883457266,
"filename": "test-01.xlsx"
},
"furniture": {
@@ -25,6 +25,9 @@
},
{
"$ref": "#/groups/2"
},
{
"$ref": "#/groups/3"
}
],
"content_layer": "body",
@@ -85,6 +88,20 @@
"content_layer": "body",
"name": "sheet: Sheet3",
"label": "section"
},
{
"self_ref": "#/groups/3",
"parent": {
"$ref": "#/body"
},
"children": [
{
"$ref": "#/tables/6"
}
],
"content_layer": "invisible",
"name": "sheet: Sheet4",
"label": "section"
}
],
"texts": [],
@@ -3382,6 +3399,95 @@
]
},
"annotations": []
},
{
"self_ref": "#/tables/6",
"parent": {
"$ref": "#/groups/3"
},
"children": [],
"content_layer": "invisible",
"label": "table",
"prov": [
{
"page_no": 4,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 1.0,
"b": 2.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
0
]
}
],
"captions": [],
"references": [],
"footnotes": [],
"data": {
"table_cells": [
{
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 0,
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "header",
"column_header": true,
"row_header": false,
"row_section": false
},
{
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 1,
"end_row_offset_idx": 2,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"row_header": false,
"row_section": false
}
],
"num_rows": 2,
"num_cols": 1,
"grid": [
[
{
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 0,
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "header",
"column_header": true,
"row_header": false,
"row_section": false
}
],
[
{
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 1,
"end_row_offset_idx": 2,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"row_header": false,
"row_section": false
}
]
]
},
"annotations": []
}
],
"key_value_items": [],
@@ -3407,6 +3513,13 @@
"height": 36.0
},
"page_no": 3
},
"4": {
"size": {
"width": 0.0,
"height": 0.0
},
"page_no": 4
}
}
}

Binary file not shown.

View File

@@ -87,13 +87,14 @@ def test_pages(documents) -> None:
backend=MsExcelDocumentBackend,
)
backend = MsExcelDocumentBackend(in_doc=in_doc, path_or_stream=path)
assert backend.page_count() == 3
assert backend.page_count() == 4
# number of pages from the converted document
doc = next(item for path, item in documents if path.stem == "test-01")
assert len(doc.pages) == 3
assert len(doc.pages) == 4
# page sizes as number of cells
assert doc.pages.get(1).size.as_tuple() == (3.0, 7.0)
assert doc.pages.get(2).size.as_tuple() == (9.0, 18.0)
assert doc.pages.get(3).size.as_tuple() == (13.0, 36.0)
assert doc.pages.get(4).size.as_tuple() == (0.0, 0.0)