diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index 95dcfe75..9fec8838 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -34,11 +34,12 @@ class ConversionStatus(str, Enum): class InputFormat(str, Enum): """A document format supported by document backend parsers.""" + PDF = "pdf" DOCX = "docx" + XLSM = "xlsm" PPTX = "pptx" HTML = "html" IMAGE = "image" - PDF = "pdf" ASCIIDOC = "asciidoc" MD = "md" CSV = "csv" diff --git a/docling/document_converter.py b/docling/document_converter.py index 08095d43..e1446f0b 100644 --- a/docling/document_converter.py +++ b/docling/document_converter.py @@ -156,6 +156,7 @@ def _get_default_option(format: InputFormat) -> FormatOption: InputFormat.JSON_DOCLING: FormatOption( pipeline_cls=SimplePipeline, backend=DoclingJSONBackend ), + InputFormat.XLSM: InputFormat.XLSX, } if (options := format_to_default_options.get(format)) is not None: return options diff --git a/tests/input/sample_sales_macro (1).xlsm b/tests/input/sample_sales_macro (1).xlsm new file mode 100644 index 00000000..0bd6663e Binary files /dev/null and b/tests/input/sample_sales_macro (1).xlsm differ diff --git a/tests/output/sample_sales_macro (1).json b/tests/output/sample_sales_macro (1).json new file mode 100644 index 00000000..d61c438d --- /dev/null +++ b/tests/output/sample_sales_macro (1).json @@ -0,0 +1,2353 @@ +{ + "input": { + "file": "sample_sales_macro (1).xlsm", + "document_hash": "bca115316d047aa424d9afc428ad78c28e82408971a72379256e47e40c62506a", + "valid": true, + "limits": { + "max_num_pages": 9223372036854775807, + "max_file_size": 9223372036854775807, + "page_range": [ + 1, + 9223372036854775807 + ] + }, + "format": "xlsx", + "filesize": 6039, + "page_count": 1 + }, + "status": "success", + "errors": [], + "pages": [], + "assembled": { + "elements": [], + "body": [], + "headers": [] + }, + "timings": {}, + "document": { + "schema_name": "DoclingDocument", + "version": "1.3.0", + "name": "sample_sales_macro (1)", + "origin": { + "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "binary_hash": 2697172271627391082, + "filename": "sample_sales_macro (1).xlsm", + "uri": null + }, + "furniture": { + "self_ref": "#/furniture", + "parent": null, + "children": [], + "content_layer": "furniture", + "name": "_root_", + "label": "unspecified" + }, + "body": { + "self_ref": "#/body", + "parent": null, + "children": [ + { + "cref": "#/groups/0" + } + ], + "content_layer": "body", + "name": "_root_", + "label": "unspecified" + }, + "groups": [ + { + "self_ref": "#/groups/0", + "parent": { + "cref": "#/body" + }, + "children": [ + { + "cref": "#/tables/0" + } + ], + "content_layer": "body", + "name": "sheet: SalesData", + "label": "section" + } + ], + "texts": [], + "pictures": [], + "tables": [ + { + "self_ref": "#/tables/0", + "parent": { + "cref": "#/groups/0" + }, + "children": [], + "content_layer": "body", + "label": "table", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 0.0, + "t": 0.0, + "r": 4.0, + "b": 21.0, + "coord_origin": "TOPLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "image": null, + "data": { + "table_cells": [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Product", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Date", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Quantity", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Revenue", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-01", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "5000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-02", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "12000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-03", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "3000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-04", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "8000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-05", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "7000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-06", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "6000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-07", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "15000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-08", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "9000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-09", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "4000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "11", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "11000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-11", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "5000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "8500", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-13", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "6200", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-14", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "7100", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 15, + "end_row_offset_idx": 16, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 15, + "end_row_offset_idx": 16, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-15", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 15, + "end_row_offset_idx": 16, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 15, + "end_row_offset_idx": 16, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "10500", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 16, + "end_row_offset_idx": 17, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 16, + "end_row_offset_idx": 17, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-16", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 16, + "end_row_offset_idx": 17, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 16, + "end_row_offset_idx": 17, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "3200", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 17, + "end_row_offset_idx": 18, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 17, + "end_row_offset_idx": 18, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-17", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 17, + "end_row_offset_idx": 18, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 17, + "end_row_offset_idx": 18, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "9400", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 18, + "end_row_offset_idx": 19, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 18, + "end_row_offset_idx": 19, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-18", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 18, + "end_row_offset_idx": 19, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 18, + "end_row_offset_idx": 19, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "12500", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 19, + "end_row_offset_idx": 20, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 19, + "end_row_offset_idx": 20, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-19", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 19, + "end_row_offset_idx": 20, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 19, + "end_row_offset_idx": 20, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "6100", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 20, + "end_row_offset_idx": 21, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 20, + "end_row_offset_idx": 21, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-20", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 20, + "end_row_offset_idx": 21, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 20, + "end_row_offset_idx": 21, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "8900", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 21, + "num_cols": 4, + "grid": [ + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Product", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Date", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Quantity", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Revenue", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-01", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "5000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-02", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "12000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-03", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "3000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-04", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "8000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-05", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "7000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-06", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "6000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-07", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "15000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-08", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "9000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-09", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "4000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "11", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "11000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-11", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "5000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "8500", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-13", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "6200", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-14", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "7100", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 15, + "end_row_offset_idx": 16, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 15, + "end_row_offset_idx": 16, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-15", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 15, + "end_row_offset_idx": 16, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 15, + "end_row_offset_idx": 16, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "10500", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 16, + "end_row_offset_idx": 17, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 16, + "end_row_offset_idx": 17, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-16", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 16, + "end_row_offset_idx": 17, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 16, + "end_row_offset_idx": 17, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "3200", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 17, + "end_row_offset_idx": 18, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 17, + "end_row_offset_idx": 18, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-17", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 17, + "end_row_offset_idx": 18, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 17, + "end_row_offset_idx": 18, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "9400", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 18, + "end_row_offset_idx": 19, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 18, + "end_row_offset_idx": 19, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-18", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 18, + "end_row_offset_idx": 19, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 18, + "end_row_offset_idx": 19, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "12500", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 19, + "end_row_offset_idx": 20, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 19, + "end_row_offset_idx": 20, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-19", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 19, + "end_row_offset_idx": 20, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 19, + "end_row_offset_idx": 20, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "6100", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 20, + "end_row_offset_idx": 21, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 20, + "end_row_offset_idx": 21, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-20", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 20, + "end_row_offset_idx": 21, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": null, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 20, + "end_row_offset_idx": 21, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "8900", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + } + ], + "key_value_items": [], + "form_items": [], + "pages": { + "1": { + "size": { + "width": 4.0, + "height": 21.0 + }, + "image": null, + "page_no": 1 + } + } + } +} \ No newline at end of file diff --git a/tests/test_backend_msexcel_xlsm.py b/tests/test_backend_msexcel_xlsm.py new file mode 100644 index 00000000..ac4ec405 --- /dev/null +++ b/tests/test_backend_msexcel_xlsm.py @@ -0,0 +1,47 @@ +import os +from pathlib import Path +from docling.document_converter import DocumentConverter +from docling.datamodel.base_models import InputFormat, FormatToExtensions +import json + +INPUT_DIR = Path("tests/input") +OUTPUT_DIR = Path("tests/output") +OUTPUT_DIR.mkdir(exist_ok=True) + +supported_exts = set() +for fmt in InputFormat: + exts = FormatToExtensions.get(fmt, []) + if exts: + supported_exts.update(exts) + else: + supported_exts.add(fmt.value) +supported_exts.add('xlsm') + +print(f"Supported extensions: {sorted(supported_exts)}") + +input_files = [f for f in INPUT_DIR.iterdir() if f.is_file() and f.suffix[1:].lower() in supported_exts] +print(f"Found {len(input_files)} files to process: {[f.name for f in input_files]}") + +converter = DocumentConverter() + +def convert_paths(obj): + if isinstance(obj, dict): + return {k: convert_paths(v) for k, v in obj.items()} + elif isinstance(obj, list): + return [convert_paths(i) for i in obj] + elif hasattr(obj, "__module__") and obj.__module__.startswith("pathlib"): + return str(obj) + else: + return obj + +for file in input_files: + try: + print(f"Processing {file}...") + result = converter.convert(str(file)) + out_path = OUTPUT_DIR / (file.stem + ".json") + result_dict = convert_paths(result.model_dump()) + with open(out_path, "w", encoding="utf-8") as f: + json.dump(result_dict, f, ensure_ascii=False, indent=2) + print(f"Converted {file.name} -> {out_path.name}") + except Exception as e: + print(f"Failed to convert {file.name}: {e}") \ No newline at end of file