From f837105a09073ff1fe0ccddd37eaa6c8abd69552 Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Tue, 19 Nov 2024 11:21:41 +0100 Subject: [PATCH] added tests for merged cells in excel Signed-off-by: Peter Staar --- docling/backend/msexcel_backend.py | 119 ++- .../groundtruth/docling_v2/test-01.xlsx.itxt | 5 +- .../groundtruth/docling_v2/test-01.xlsx.json | 953 +++++++++++++++++- .../groundtruth/docling_v2/test-01.xlsx.md | 20 +- tests/data/xlsx/test-01.xlsx | Bin 20214 -> 21387 bytes ...est_msexcel.py => test_backend_msexcel.py} | 0 6 files changed, 1057 insertions(+), 40 deletions(-) rename tests/{test_msexcel.py => test_backend_msexcel.py} (100%) diff --git a/docling/backend/msexcel_backend.py b/docling/backend/msexcel_backend.py index 936c2ec4..31f0d871 100644 --- a/docling/backend/msexcel_backend.py +++ b/docling/backend/msexcel_backend.py @@ -177,7 +177,8 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend): """ Find all compact rectangular data tables in a sheet. """ - + #_log.info("find_data_tables") + tables = [] # List to store found tables visited: set[Tuple[int, int]] = set() # Track already visited cells @@ -198,7 +199,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend): tables.append(table_bounds) return tables - + def _find_table_bounds( self, sheet: Worksheet, @@ -214,56 +215,47 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend): """ _log.info("find_table_bounds") - max_row = start_row - max_col = start_col - - # Expand downward to find the table's bottom boundary - while ( - max_row < sheet.max_row - 1 - and sheet.cell(row=max_row + 2, column=start_col + 1).value is not None - ): - max_row += 1 - - # Expand rightward to find the table's right boundary - while ( - max_col < sheet.max_column - 1 - and sheet.cell(row=start_row + 1, column=max_col + 2).value is not None - ): - max_col += 1 - + max_row = self._find_table_bottom(sheet, start_row, start_col) + max_col = self._find_table_right(sheet, start_row, start_col) + # Collect the data within the bounds data = [] visited_cells = set() for ri in range(start_row, max_row + 1): - for rj in range(start_col, max_col + 1): - + cell = sheet.cell(row=ri + 1, column=rj + 1) # 1-based indexing # Check if the cell belongs to a merged range row_span = 1 col_span = 1 + + #_log.info(sheet.merged_cells.ranges) for merged_range in sheet.merged_cells.ranges: - if (ri + 1, rj + 1) in merged_range: - # Calculate the spans + + if merged_range.min_row<=ri+1 and ri+1<=merged_range.max_row and \ + merged_range.min_col<=rj+1 and rj+1<=merged_range.max_col: + row_span = merged_range.max_row - merged_range.min_row + 1 col_span = merged_range.max_col - merged_range.min_col + 1 break - data.append( - ExcelCell( - row=ri - start_row, - col=rj - start_col, - text=str(cell.value), - row_span=row_span, - col_span=col_span, - ) - ) - - # Mark all cells in the span as visited - for span_row in range(ri, ri + row_span): - for span_col in range(rj, rj + col_span): - visited_cells.add((span_row, span_col)) + if (ri, rj) not in visited_cells: + data.append( + ExcelCell( + row = ri - start_row, + col = rj - start_col, + text=str(cell.value), + row_span=row_span, + col_span=col_span, + ) + ) + # _log.info(f"cell: {ri}, {rj} -> {ri - start_row}, {rj - start_col}, {row_span}, {col_span}: {str(cell.value)}") + + # Mark all cells in the span as visited + for span_row in range(ri, ri + row_span): + for span_col in range(rj, rj + col_span): + visited_cells.add((span_row, span_col)) return ( ExcelTable( @@ -274,6 +266,59 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend): visited_cells, ) + def _find_table_bottom(self, sheet: Worksheet, start_row:int, start_col:int): + """Function to find the bottom boundary of the table""" + + max_row = start_row + + while max_row < sheet.max_row - 1: + # Get the cell value or check if it is part of a merged cell + cell = sheet.cell(row=max_row + 2, column=start_col + 1) + + # Check if the cell is part of a merged range + merged_range = next( + (mr for mr in sheet.merged_cells.ranges if cell.coordinate in mr), + None, + ) + + if cell.value is None and not merged_range: + break # Stop if the cell is empty and not merged + + # Expand max_row to include the merged range if applicable + if merged_range: + max_row = max(max_row, merged_range.max_row-1) + else: + max_row += 1 + + return max_row + + def _find_table_right(self, sheet: Worksheet, start_row:int, start_col:int): + """Function to find the right boundary of the table""" + + max_col = start_col + + while max_col < sheet.max_column - 1: + # Get the cell value or check if it is part of a merged cell + cell = sheet.cell(row=start_row + 1, column=max_col + 2) + + # Check if the cell is part of a merged range + merged_range = next( + (mr for mr in sheet.merged_cells.ranges if cell.coordinate in mr), + None, + ) + + if cell.value is None and not merged_range: + break # Stop if the cell is empty and not merged + + # Expand max_col to include the merged range if applicable + if merged_range: + max_col = max(max_col, merged_range.max_col-1) + else: + max_col += 1 + + return max_col + + def _find_images_in_sheet( self, doc: DoclingDocument, sheet: Worksheet ) -> DoclingDocument: diff --git a/tests/data/groundtruth/docling_v2/test-01.xlsx.itxt b/tests/data/groundtruth/docling_v2/test-01.xlsx.itxt index 72db0426..cab5f63b 100644 --- a/tests/data/groundtruth/docling_v2/test-01.xlsx.itxt +++ b/tests/data/groundtruth/docling_v2/test-01.xlsx.itxt @@ -4,4 +4,7 @@ item-0 at level 0: unspecified: group _root_ item-3 at level 1: section: group sheet: Sheet2 item-4 at level 2: table with [9x4] item-5 at level 2: table with [5x3] - item-6 at level 2: table with [5x3] \ No newline at end of file + item-6 at level 2: table with [5x3] + item-7 at level 1: section: group sheet: Sheet3 + item-8 at level 2: table with [7x3] + item-9 at level 2: table with [7x3] \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/test-01.xlsx.json b/tests/data/groundtruth/docling_v2/test-01.xlsx.json index 941525bc..9a9e0d52 100644 --- a/tests/data/groundtruth/docling_v2/test-01.xlsx.json +++ b/tests/data/groundtruth/docling_v2/test-01.xlsx.json @@ -4,7 +4,7 @@ "name": "test-01", "origin": { "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "binary_hash": 6822153538473622425, + "binary_hash": 9744611217659152490, "filename": "test-01.xlsx" }, "furniture": { @@ -21,6 +21,9 @@ }, { "$ref": "#/groups/1" + }, + { + "$ref": "#/groups/2" } ], "name": "_root_", @@ -58,6 +61,22 @@ ], "name": "sheet: Sheet2", "label": "section" + }, + { + "self_ref": "#/groups/2", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/tables/4" + }, + { + "$ref": "#/tables/5" + } + ], + "name": "sheet: Sheet3", + "label": "section" } ], "texts": [], @@ -2282,6 +2301,938 @@ ] ] } + }, + { + "self_ref": "#/tables/4", + "parent": { + "$ref": "#/groups/2" + }, + "children": [], + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "second ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "third", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 7, + "num_cols": 3, + "grid": [ + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "second ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "third", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + }, + { + "self_ref": "#/tables/5", + "parent": { + "$ref": "#/groups/2" + }, + "children": [], + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first (f)", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header (f)", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "second ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "third", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 7, + "num_cols": 3, + "grid": [ + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first (f)", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header (f)", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "header (f)", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "first (f)", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "second ", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "third", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 2, + "start_row_offset_idx": 5, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } } ], "key_value_items": [], diff --git a/tests/data/groundtruth/docling_v2/test-01.xlsx.md b/tests/data/groundtruth/docling_v2/test-01.xlsx.md index 17afcfd4..4a059c60 100644 --- a/tests/data/groundtruth/docling_v2/test-01.xlsx.md +++ b/tests/data/groundtruth/docling_v2/test-01.xlsx.md @@ -30,4 +30,22 @@ | 1 | 2 | 3 | | 2 | 4 | 6 | | 3 | 6 | 9 | -| 4 | 8 | 12 | \ No newline at end of file +| 4 | 8 | 12 | + +| first | header | header | +|----------|----------|----------| +| first | second | third | +| 1 | 2 | 3 | +| 3 | 4 | 5 | +| 3 | 6 | 7 | +| 8 | 9 | 9 | +| 10 | 9 | 9 | + +| first (f) | header (f) | header (f) | +|-------------|--------------|--------------| +| first (f) | second | third | +| 1 | 2 | 3 | +| 3 | 4 | 5 | +| 3 | 6 | 7 | +| 8 | 9 | 9 | +| 10 | 9 | 9 | \ No newline at end of file diff --git a/tests/data/xlsx/test-01.xlsx b/tests/data/xlsx/test-01.xlsx index 5a87d4f646dd7333b7d6724c304efd86f48ec060..ab75b72d56c5315d33acd8aee12113609c1120cd 100644 GIT binary patch delta 5886 zcmZX2bzD?k*Y+@gNQdA6lETm_-6<*E-90oC3LFUsL12JEKtj43q)P-zLAsGvq!DQb z-nsAReV_Y#p6~o|*4byTeST-}Yp-=(YoBFdye-CnXscslk%R7m?t?%eM$k)5%h(1C z5U8XU@)!(!O|j7W%tsDC)VLv5_Z%_o?WOEF`ixMAk^-{s#IRhqj1PH7hft3l@JJM2KJ*n*^3!_5;$r6lCws`F;yB5AK8lVRg+HN&# zs~9Y{RSLhGb7nbN=>+RGLxpM`x1Na>tFwc}w&;~mIR%7_`2(UjZztBU=dFV|b8ml3 zv3dvu3PqEv7uldq_j-sf%wO$wD z&7JDha2klE(D&(zfi|K4U@L#Vjp_Fsd|3#g%+I&5m3TEcV5ZzZ{z9~!y28wfHcOlg zOs_U4_|09zS)rh6spUp?`JAGJ8%B300Jo}N!xYB^biecQ6k>uv>bM{fF&frD7;k{5 zkDINhryEb8JFLRM!ZTl#JjBZ77IQsofhB=N{|O$($9D@l?;IDFi-$0Z1M$;@ic}3o zfNVE1X9=pxx-7Y98Q%V%mf0`G?%Fx=H=+mOZ#nn|se}m)3mVUTxS;mb z1hY3CVlEYsQ>0UL`$YlDYynw4i*N4J{Ok!30O7f{*GTK#?UJNoT|wLYQ4saMD*dGv z-{Vb-`~Lm>qJ)ETNmAY+QJ9=!w+!XSuY^LenJ!Yt@2iwg#t?U3Go(v+)sM_y!XbP9 zycQ8Nrx4Sf431~{IdL%pRmKm~9;C%^zieVy6o&<2v|-V~^5z+Sy1CfH6c*(xWd{JM zVC#iBy0uZVNPb>&B!vNtkpo*?^5V2SJtGVA>@g(A%Wm-2y~RGkIEVL$!ZiKLZ`oY$ z-J?n_@PiQdPeBE2PebGdOH$V9Q_Ai+JmzMIP6W@|C{a9Z%bU*?sVnRk`z;fxu16~( zsQn!Q5D@aEhVqWOK?y0X6jQzAME(G{Rrrb+&)Q&}W9R&>)w;is*Qu|6GTY65G29DE zq-fQI1y#|S4*HQP;lJdk?;jWOQi@0Ej~Q;^K%KWg`K>M>8mqReRv70zgU-}8u<7a4 z&t(Kr-$nLos)Cv_-7}0vwo7Z?o%#q|>6YWIZ?1B-mdlA?etn|-Z5 zdLQd&Eb*2FrR!R-r?b{Y7BxcOJ_WyzRHflLnu=l7>;EhqY3KUQVmLZ0dbgE{%(iVK z>O0AvD>uphi~OD68%ncQ;HMVn76TdzS62ih z2|YEJMER;;RZMH7Fs%mc#nNFw>ABQEX2fo9_X+NdWj~YGT*gAm&sdn-nxg-n<~kdZ zMI{!B_BWs`bWk^PDC3pxpjl|Dnl3G`#68baK&aA${}9~iIS8+UtdnZ{ktG=uy|?8g z=#Sr08XU+K2qIwHnHypuc}lEph><*Loj zK`c7!W4*#DXRMwwNQG9}L7rSl1oKUZ%vdpMt@yAPa?dr(AOXsL|6^gf=_1>OXos|9 z1j9_@L{@!?wQRV8H_!z1X!_nL7M(KZ#Xzy)^M?fm6V}!2-gNGnQ#^mt?Wm}KJqLnj z2!|v!nx5QDzp^c2kAD~LRUyov8G#tRb8Bn$P!IOrazdn38xMFwM(s(&Vs^$DIMYm- z`v+N~dl{;GHbvlrsn&ImdhFHt7KLmEwqH$4M5W2MR=Cbsh6kBaz@vEJp?Cywr%9-d z^fMYQT;2ma_Wpo{sLW;CV|)z5_szwgbc z=Bra{$g9cX>#28e|N2J1YpoQ0Vm{k0K&|!yteg$WmZ8A03#h_A!ouEHTX?bZGgfRo zeBHzL#aQzsWqEUdDX{!x)@=3kB5}u<22Q1C?)u`P&-ELdbR|4vs#>EQ6jkk*H{!C* z@a!Ou{rG-y^YZ?+Eafy)ZDQ7+V$wpz=o^RfxUCGLU1=qD$fomg^sxqyP2XvoiW;6< zrppbMga7Dd=Z#o~08iCK2#T0rXNaqPa1G?Ce^Z;BS6oi90@Q3q82f*LuQ;4^RH$D$ zJv16FNFNPEu4dHz#KN&#JmiYZVNj58W7?{f%{7`;QvWHJAk;!BNT?8|6N@@g z|J;odVNsCqh@B^V#7oJd*e!=+>rAj7+9fhaVM5+?jBSpPx*ka)4A6dmU@+79WjHJ# z9}Yj1xLc;_qzqqbq%#tzaCeSxm#+MTyQdZ_9nC)tfa?(WEqu6eeK= z7ULHgNZ^N@r?e#JMABa}jJ*w2D@s~Q%gUnM!u$F0C?Nx?1}hzQ%7azO8O~4=%r~At zqO#|AO*nn9b_-naspI6;awv+D_N#JafQkrQ=;%q~`p`%1F?`W0@w*!r|0n*3)F7>d zvDR?m?*U?Vnu0kkkKPlRE|tbjFKtW#2Wh)pN3Nm;6mNU@?9zu*EOA>x;5Iv^cx-W^ z&dw+bTRCsUX`7P&fzsqURpt+WU>j`X7xku6f5Pe>8`N|&!@`Ygg<~otUYBQJ-u7)R zYm;Ell>!U*YoXDp&`5j#%1fQwWDP1vz(oFSF&`Q$8i%`Ao+TG{(5D*`oS{Gjls?iS zz!E2n$seZ3*E2g=!XZT=9hQX}!6RgQs{%@Y!4RGhQ;s_H6U{pX{)O{x|jk zKdvwOtcdAu9S2h+jHyn|3_F>-zl8&Z;pz0rcOIHnvIo?Naw^yTktCy3vP{`jTmF2? z$s3${Fqt|Sh$&i59YEuzf(}8-f5e2Zvx9@5FYn(*;4f8?i8MoG@R7V;;XIWw4rT4z zti#1K*{X9f3Mp+}JeyBrG_K`u$jT_YJttB`gPQ6LO@Mdbj?x6DNz=u#y!5i17te2p z=z$wPzAh7?LS44T_a$F2>;Ur4W(rDLtG_n(PKtkZw^mkzz;g2Y0;aFW3Sxsy!bm%v z9+ezxxMY!H&@aO;`@+~-HI(X^F6uEeiu3|dL@Yu-owH{7k@4%lRJu_CHxK2bc1GAWTJ6ZcRMi=m^ zcXySml3e0DCtzqt>H!l!1e*L8d4l^4V^vnTofGBnHAF$3v&KDx-%#C6mVOjct10ih zzRGJlc!F9^G!Ha3MVV3L%In+&;IVztjG{j~qL?beo1@J=7nXGKHVsQnGo>j?&G`zT zvAnzc`<865=n=t3fTHNdA^6;&&pwgEyb?w^@u9Ue#{;;&p2tn?h3cNb4dR$UKEn~bY_I|t^t{H8#GVQ<@afT?43Jhz`Xi_$Yn=CctSX+1QBaMGQD~#dYN#XMkxhavX|&-=Bxbrg46zu6pnoshSz zHd0i9r>2Mc>&Ei#t5`gHAaZEUD{3ixitg*iH2Td@ETv)z^AGZZ=xz7-v+Plse-<02 zZAKK%N7Ak~iFqI2cJQ7GM`>`-{i#9{I%r|v?px;VDAZyQbG^X4ilLrrPYqDh7Np7Z zlbyGYcbH2^%!nipyxN`Z6Lz2TjnvR(WZ}^fnYNO&jP)<|1m)!D{%WkUsq@57uET_( z(=o6rAq%(iHWwRAA-9nu2GRN~kMqU%L3R&DJ^I814oV8IvY1}IGc}kw+ZCTL_O?@$(&0}T=ti|3`yxx77vn5g{O{%AWQoaT(R~o>&T z+oqst;q)=GhViN15}_z{@x$_$%)D`pt^Q80)cq_@k~)ecSbgP(ACNJ05wfePP<9P; z2ze@|ll0KFq!RyhseD&oG9|Gk`;1{n9jFP^zEY(K9hSH*k!<+3orrhTo3`mv`dWA( z%ul;z;3fIH3Pdw3vvE6xIzRhHQv(XQ4y~40QkBM1Cjwuvtq&5=<3}} z%VRa5GArWYz+}v1oT2`O{)>bS@C5sNl7L&>;A$)DCLvu36O{lx2TD35?`Ip_Xo6w7 zY7xm>%F2Ga?kiiA443OJ)2)Oi(}JKaz%1YFa@W-oJ8k;f~O23Dy2%KcG9m}dS4 zH-pL-*VYFIx6SX@Ca>_zFEso5OR8^l+K3+a6rVb+@QqKcbeo>f%cu50_6fcnA$GTl zFLzqsM3^*ZVk3Q1uukk-@RA0neHu7^*w9#_XbAHIV)H!w92h?w>q*68$+xlsMn|NnyRUu#{H#to%gU4Kiw#hZRTK!q4>AD{!-jP$o|>R3OZQJ(W;vqZPBH(ud|Jh zgT0=gkBf(s?_W$8oFJ#!!${or24h6@yWK{veNx^%bV^SwSN?BBH-^XrJHj zDS2(?`Q6u9`seLMs^&FbUkh?q0vHm^&k{$erN6KES2Hx5{`Mb=r0z^ub3QmE_oZ+L zQjHZ#o=d9?@>tike-qHv>oZTPbmwv!^p)EcW0(;f%-yxLuS=9|Zr7gX`@`@sCL8~Q zN6z6Vws<8JoF=fb>U9%m>{hsSpRs9Y(lpzp-F5%a_LIwo*(s8@RX&4DpXcV zr&L!jtl8wOG42iRuy8@h%gh48(xI^Y0LQK@3o#QK2ddzR+NXuOig}dSIe}2T2CZ>n zE4VMYaeuP4Lm|Uc)*9!;^tNqC z<;TtBUajK|avuY+9ta1ZqAY24%^2`j);s~-Bf8SAXY*jJe(i9qEiM+)?k-Clr3INs zrVgvguO(c%CdCdCxC2?w4)VEQ*jdc#NO{;@++cN8TZ-(L+2p31dCQtS7Tvm+R{pH< zo{ZR>Xy3DNoYNn0XuZeQw+>EO$*ii%rn=%rbw6j4FvQa~D9@1K;>9J@Di21EU}0or ze7|*^oP;CJNI&wY{FcShDB&l;9i#LQ<|2E|;^S9=81}fGm(=sbNJg56_o>W|GSK@- zr`}MB5d+ZhL}yL-A?6u9!SW-qL2ZJtiaZsOKQ*=(N&ZfTMRvJ$_HQZ@_hz*kb!O-L z9Ym52@pnksOEdH~>WG-;SMh7jHy?hrb z+sELUmZTH?eL~TtA|^lVc?#BR`exESWNjsTWd4`aby?yjC>Q_fWDi`_Bu$g4KyR5lXovujs&K)p6bn9 zWg!ylVmRQ2SRi!YUDMdoK#5yqWCx8y=s{U?IABpvORo4b-7a1)Ke^R65K4bQ*W z4g<`M4*37?7!=8*s*87kmcLd$4lrL{B&jCFqyIi$L#K)Vy&fQ|RUbe4ch!y_M*V+X zf7>nOo+>xZzpGZ9e<|vJTv!Q_9E8+JEj5xy|1OsZ{@aNg`9h8J$$!i8e?Fps&RhR` z;`bjS9hs=c1D2yicB?^I|IO$3Kp-^X|34Q8!@rWAUl7c}7d?cW=D$DFqKzit4=uwIwC{4QbCgnf?ktQXANE7Kr5TvLmAYG({ zCe?rx=|!Z5`tbhm-MjAp=Bzbk&0cfXoHP5|`TN%so;|#-UZBP5{}uXO=v53xw4IxK2Ic zNEB8&^?m!z^=#(E#*g{kWkcQk0U{OM6XL8@#PPLdl5ts$eK|uGY4_opP&e3uE9}eF z`JT8z*A39+Sn2c{9%BD4*{=|x@E5-K9UnXlxdI6}BJl@Q#X2B|d-pxs3Z=lh+G+_c zo%v+xan$T7UG%5Z0^7OCR}M0xq)huGy&InKB%#loMi6PW)?Wl9DqHe=0yHF;W_k>9 zCwc?zjY(2Xx07_EsBw8b%)y0(=X*W=F3%^dlN5zEf~`=7CZV?%lpps;7@C11Q4XHH zLPi-Cn?%5g=e6H5lsJ)s>3hUV=&n8F$|awzur3Ecm`As#^V(~vA0HBtnEGpstr3r4 zITE|;kWia<%Z66L-)IANhGq^(OQK9uI>QvG9OetY$A#hF&(HeLR*K^nj!6|eeZS{? z8=Z%;Nsx{*u3X|gE_UrkRd$t?|!igG} zstD--y0r0C{)F&`86m@<_krtM^X}Mi6n)JJ(G?;dflHy&aB;z z(+u&YW$MzMN@W6X`q)*u3|io8QaCe_x0N<^li}JXXp-G2%#DpO6Z_)N#jKz zz)$EAA0BP0=;eRK#6_EHHwdZhrv-GHv;r@w&xzaaQ}efs4jR$p3LHDkv%!*+$x1^8 z;yep_*$GO%uq6h%+VHX`AbW^5|(`S-n+OJ}x{kkQpfTIJuNuwF7j6R6rzR&oXS(OgwvuIFBRFeO^bY~jFGcy6$_`99^AdZGD&t z%WO>py)eV)EkF8QuD}ACpCDeT-T-!?t0a}6vJm-u%NUsh->XDk=W_`mT{rogq;4+e zNa=iUXTiS~2h5kJgMIllNIV@B1R!cUQGoEfllq^|0UslBLX8J3blB7Gu9nKhhf`?lLrnwQ5~r3-9ra0)$o-;j(?3Nn}EUdlG91n8>GjwD`NET`n$(QFf3#F>kg~UcK#@dvX&d&XzJL5wSm*$l<43 zr8r3AS}tbs4cYR6tsTyY12oOnwAqrxbH+!fQ8BQ|h=id}~vg`;oL+yWG~XyWF|ZFVd0c>+L^w z_uD@cy2^iki>ZFz{rgNzQ7XWXzN2P%-XX4Iy+?wez!n~wF<{xUH$FuNlYS9Ogyn|O zK~bee)<+k#)v7rm7Xkw*ea`LTl#yFeO%a2Qo+)RBU*&=XqZ;d{$i%B@qxD>EOOb)c z8P7JQs|ubEq*`9h65`4cPnt$rhLUZABbG_jrqO5&7hxl#o#i(~C}Vo%mpp zcKW`4u^`#Gx8&4j;=GApaD#HUh*E%q{-w87h~=lOS{|}0{fx@oLB0ZFIcw($H;SXR zCAoB?IfX6fN?kG%#fW@FPcb4czgm6_5`c;9r?1^>!ZCa2k@Vr)cL!$mZC@{P6a#)e zciqFs=D91(*d8+v-}T#7{j$MJIV_1*wr{smTSCAoVuN1K)V=jL&0t9UA^GlO2QnckGmCRyVXiU@pR z(jsJO-y#=s#$w+s37cQ#v=noWK`4~)Fl1mV^FC#v>+i;&5kzN~mD=s9<~!5zRok&n zs(dnrT%@+}r)^-AVOg#*-K)YG&1E=m3O=24UKmg`Km2%wbu(9THSu*w94|o9jDo>t zgwNa9n!~@>bC_O#CKqt!4US$W{A!2I)oVor>Y5!YZ7?|!WEH=9Aw)kZu}By*c1E8B z$iiAAW{Sh3W`y^UT+NrgDKc}DjA`kl5XRqnHfye_RoU1&%il0Q-I9l2`G~g4OI2ne zJ(lMcc zqE&YqF{74TqOBl2G5^$ICa|(|fRA?1N%%PTo94MnW$;|KK_z3f9ti$tuVO%$NPAO8!+_vDr_@4B^_#{h@7r8m=QW|oz%t`%?G4Lo_7bEKow@E zQyp^qyLo!0XCns)u9pVl!tzfIgPx`~D?|Llc(}KZg44{mj-psyNuJcUVK}i2Zm<5(hCxrVpX^p05_K@arl|iH~>G| z{OUI^09x+`0n#;akZqpM}VijLd(BF9$WZ-ATCRaZg8e>pA?2W_oS`L+)kk{IG zdR0iUSw=ElGhU4?H-6lVq;DbST`MUo8HlwH=n`rAiUd#+fE_xe@g+9g}Siu-;@B>5Coj6gR&EXp_@M&!%gCrQf7^@b-igezR|<2#OMAT|G>p z*z{v((0rSPuXrZyBN*n)n&22S>F8=RlA)H;e|$A@p>WV-Y!l)UQ|V&$_MK0$9m+9K z6|!)^tpSD3*GyXM=oipjbM0vimN9Ut$kHgB79g{M7|mdSiJBoq*)NOf39X-{r!n=n z!}vS-40J?mRj3Lg`ZcEWOYB10edlX6s7IJi=lq?8K8)`L;b--Khg2 z`El1mrib{eq{fN)J&Hu6`dT%TGH&vSjT25*i__*D=0!Yu%z2VY?-8;OclbXbO z8g{3ym?6>%yfkxfEX$T*S-r!#fo5~%{fk-bX{vEBz&md{a-->6WyF_X-AxB54cnIy zb(_@@2QhiLzY0yMT&7gR?`4qCjec+@(^#&_I8QUXHu4yX=y~q~$1X2X7;@WKoZ;;LtQc5X1J2s~Ctpq5mVO07Z5ouJ1xn ztuN9Vy8rlLZ%1n{d%IgcUQQ1ky#JWu;G~vV$TiBxTdhrM-InsJc|6&V%*Oc#$OPPc z$cKGgCEgzI`|WXksp<+Q>;Q1@!<&r_*0brQQ9tv*Q48Kre4J)|PI9)Ct7XV5agm}Y zJZ&wZy_7X2k95Afs`5*$*FBiA_WJaN2#wG3zNx=zCYBM65%Y)^C><*=npj@o3I83) zS2`_Kl04_z{)T@XBgIw`D6_91a}Qq^S^XWpE}K?%!~FFiPz~h8E8PmfnXlMD-AP85 zLYuNM<$~{XFfS1$YjNxBe>PmB&)3aMlJU)_UcH7*`#KnqIR)RjDFj}PL8Y1#&pnu! z8s+g5nM&H<@Jx{=P9@UX>J63u=+z`sCUgW@v#X#`PodMu4HPYyW?Z)a7{6t-pfHRv zu6qp0{`^WKFbM0d7U*W}=0Y^8<6*?*r1e2?pe0D{n%@a1_5+`c58S`bxHq~OZ`*cA zHbvAw9ThZe)vl`I6o{XA$2ZVXBf|lH%`>ojTDm_*E@dTzcPj z$@FxUP5+zL5B{|6Z7c8g%&A5m#&E!R$g3Tj+cg5kvlW!wo|p6vxw2fQ#S)~mjl8sq z7aFtA%9|4=7)NB=nq%&0It6H{CYYKi>ft=E0vwgSC3SBMbQ!F^`;(B`&YLe>{pr%JT&Bxo?C0)|D|$FB z#PKcd&*u2Z0W+ekCN$FgvLYe2MoWCOx>)X`Jd5kItf|{`-(LmBr5VVXdQF#Qhm+n| zxn!xLRr=37>t+$X5s?`3F0R#K>)y%6`;aaD| zU8ZF;FP`Z>_LO<%z4fX7`6DSNwCBThBF!YeEZ$Ag8`=in7>i>6pf%Vcu%Pn7^ z?s?g}dLzXC)P_3$2LE@Sd=VP{x4^JxdOWOu8}W-}SpIM7uXY40uP@5>w<{$Bftda^ zPX)um4d}5i^y%6D@j(9#;)MSNn$f4n${DagkaSoRgDY%*t1Kx91pC*b0&Ljl2ApjF z9nJp1oN!==43r_qoY=d3ELbr^Aqb2Y`;eaj>v7R76~<;6DnkOqvD1bskdkXyZX;#L zD_N}VMdPJ}Ei_VrBFY6L94<@% z%|EZb=(Ve1`EK#5|4o^T4+j4i0I3NA*}2;qdbxXeBdk3<{)AjZ9g>SZf