fix: add table raw content when no table structure model is used (#1815)

* add table raw cells when no table structure model was used

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* Add RichTableCell instance for tables with missing structure.

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Update test GT

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2025-10-02 13:46:42 +02:00
committed by GitHub
parent f0b630e24e
commit 4f295ed051
34 changed files with 6835 additions and 3389 deletions

View File

@@ -355,6 +355,13 @@ def convert( # noqa: C901
help="Replace any existing text with OCR generated text over the full content.",
),
] = False,
tables: Annotated[
bool,
typer.Option(
...,
help="If enabled, the table structure model will be used to extract table information.",
),
] = True,
ocr_engine: Annotated[
str,
typer.Option(
@@ -591,7 +598,7 @@ def convert( # noqa: C901
accelerator_options=accelerator_options,
do_ocr=ocr,
ocr_options=ocr_options,
do_table_structure=True,
do_table_structure=tables,
do_code_enrichment=enrich_code,
do_formula_enrichment=enrich_formula,
do_picture_description=enrich_picture_description,

View File

@@ -9,6 +9,7 @@ from docling_core.types.doc import (
NodeItem,
ProvenanceItem,
RefItem,
RichTableCell,
TableData,
)
from docling_core.types.doc.document import ContentLayer
@@ -103,6 +104,22 @@ class ReadingOrderModel:
else:
doc.add_text(parent=doc_item, label=c_label, text=c_text, prov=c_prov)
def _create_rich_cell_group(
self, element: BasePageElement, doc: DoclingDocument, table_item: NodeItem
) -> RefItem:
"""Create a group containing all child elements for a rich table cell."""
group_name = f"rich_cell_group_{len(doc.tables)}_0_0"
group_element = doc.add_group(
label=GroupLabel.UNSPECIFIED,
name=group_name,
parent=table_item,
)
# Add all child elements to the group
self._add_child_elements(element, group_element, doc)
return group_element.get_ref()
def _readingorder_elements_to_docling_doc(
self,
conv_res: ConversionResult,
@@ -197,11 +214,16 @@ class ReadingOrderModel:
)
elif isinstance(element, Table):
tbl_data = TableData(
num_rows=element.num_rows,
num_cols=element.num_cols,
table_cells=element.table_cells,
)
# Check if table has no structure prediction
if element.num_rows == 0 and element.num_cols == 0:
# Create minimal 1x1 table with rich cell containing all children
tbl_data = TableData(num_rows=1, num_cols=1, table_cells=[])
else:
tbl_data = TableData(
num_rows=element.num_rows,
num_cols=element.num_cols,
table_cells=element.table_cells,
)
prov = ProvenanceItem(
page_no=element.page_no + 1,
@@ -231,6 +253,26 @@ class ReadingOrderModel:
tbl.footnotes.append(new_footnote_item.get_ref())
# Handle case where table has no structure prediction
if element.num_rows == 0 and element.num_cols == 0:
# Create rich cell containing all child elements
rich_cell_ref = self._create_rich_cell_group(element, out_doc, tbl)
# Create rich table cell spanning the entire 1x1 table
rich_cell = RichTableCell(
text="", # Empty text since content is in the group
row_span=1,
col_span=1,
start_row_offset_idx=0,
end_row_offset_idx=1,
start_col_offset_idx=0,
end_col_offset_idx=1,
column_header=False,
row_header=False,
ref=rich_cell_ref,
)
out_doc.add_table_cell(table_item=tbl, cell=rich_cell)
# TODO: Consider adding children of Table.
elif isinstance(element, FigureElement):

View File

@@ -218,7 +218,7 @@
<picture><loc_67><loc_92><loc_205><loc_308></picture>
<caption><loc_52><loc_317><loc_223><loc_323>Figure 8: Example of a table with multi-line header.</caption>
<picture><loc_252><loc_59><loc_455><loc_185><caption><loc_252><loc_194><loc_445><loc_207>Figure 9: Example of a table with big empty distance between cells.</caption></picture>
<otsl><loc_274><loc_286><loc_400><loc_317></otsl>
<otsl><loc_274><loc_286><loc_400><loc_317><ecel><nl></otsl>
<picture><loc_273><loc_239><loc_424><loc_420><caption><loc_255><loc_430><loc_443><loc_435>Figure 10: Example of a complex table with empty cells.</caption></picture>
<page_footer><loc_239><loc_464><loc_247><loc_469>13</page_footer>
<page_break>

View File

@@ -829,6 +829,16 @@
"content_layer": "body",
"name": "list",
"label": "list"
},
{
"self_ref": "#/groups/12",
"parent": {
"$ref": "#/tables/6"
},
"children": [],
"content_layer": "body",
"name": "rich_cell_group_7_0_0",
"label": "unspecified"
}
],
"texts": [
@@ -25408,7 +25418,11 @@
"parent": {
"$ref": "#/body"
},
"children": [],
"children": [
{
"$ref": "#/groups/12"
}
],
"content_layer": "body",
"label": "table",
"prov": [
@@ -25431,10 +25445,43 @@
"references": [],
"footnotes": [],
"data": {
"table_cells": [],
"num_rows": 0,
"num_cols": 0,
"grid": []
"table_cells": [
{
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 0,
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "",
"column_header": false,
"row_header": false,
"row_section": false,
"fillable": false,
"ref": {
"$ref": "#/groups/12"
}
}
],
"num_rows": 1,
"num_cols": 1,
"grid": [
[
{
"row_span": 1,
"col_span": 1,
"start_row_offset_idx": 0,
"end_row_offset_idx": 1,
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "",
"column_header": false,
"row_header": false,
"row_section": false,
"fillable": false
}
]
]
},
"annotations": []
}

View File

@@ -367,6 +367,9 @@ Figure 9: Example of a table with big empty distance between cells.
<!-- image -->
| |
|----|
Figure 10: Example of a complex table with empty cells.
<!-- image -->

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "csv-comma-in-cell",
"origin": {
"mimetype": "text/csv",
@@ -53,7 +53,8 @@
"text": "1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -65,7 +66,8 @@
"text": "2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -77,7 +79,8 @@
"text": "3",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -89,7 +92,8 @@
"text": "4",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -101,7 +105,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -113,7 +118,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -125,7 +131,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -137,7 +144,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -149,7 +157,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -161,7 +170,8 @@
"text": ",",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -173,7 +183,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -185,7 +196,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -197,7 +209,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -209,7 +222,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -221,7 +235,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -233,7 +248,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -245,7 +261,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -257,7 +274,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -269,7 +287,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -281,7 +300,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
"num_rows": 5,
@@ -298,7 +318,8 @@
"text": "1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -310,7 +331,8 @@
"text": "2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -322,7 +344,8 @@
"text": "3",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -334,7 +357,8 @@
"text": "4",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -348,7 +372,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -360,7 +385,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -372,7 +398,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -384,7 +411,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -398,7 +426,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -410,7 +439,8 @@
"text": ",",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -422,7 +452,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -434,7 +465,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -448,7 +480,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -460,7 +493,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -472,7 +506,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -484,7 +519,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -498,7 +534,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -510,7 +547,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -522,7 +560,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -534,7 +573,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
]
]

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "csv-inconsistent-header",
"origin": {
"mimetype": "text/csv",
@@ -53,7 +53,8 @@
"text": "1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -65,7 +66,8 @@
"text": "2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -77,7 +79,8 @@
"text": "3",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -89,7 +92,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -101,7 +105,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -113,7 +118,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -125,7 +131,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -137,7 +144,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -149,7 +157,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -161,7 +170,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -173,7 +183,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -185,7 +196,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -197,7 +209,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -209,7 +222,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -221,7 +235,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -233,7 +248,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -245,7 +261,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -257,7 +274,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -269,7 +287,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
"num_rows": 5,
@@ -286,7 +305,8 @@
"text": "1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -298,7 +318,8 @@
"text": "2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -310,7 +331,8 @@
"text": "3",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -322,7 +344,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -336,7 +359,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -348,7 +372,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -360,7 +385,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -372,7 +398,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -386,7 +413,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -398,7 +426,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -410,7 +439,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -422,7 +452,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -436,7 +467,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -448,7 +480,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -460,7 +493,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -472,7 +506,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -486,7 +521,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -498,7 +534,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -510,7 +547,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -522,7 +560,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
]
]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "csv-too-few-columns",
"origin": {
"mimetype": "text/csv",
@@ -53,7 +53,8 @@
"text": "1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -65,7 +66,8 @@
"text": "2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -77,7 +79,8 @@
"text": "3",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -89,7 +92,8 @@
"text": "4",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -101,7 +105,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -113,7 +118,8 @@
"text": "'b'",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -125,7 +131,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -137,7 +144,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -149,7 +157,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -161,7 +170,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -173,7 +183,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -185,7 +196,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -197,7 +209,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -209,7 +222,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -221,7 +235,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -233,7 +248,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -245,7 +261,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -257,7 +274,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -269,7 +287,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
"num_rows": 5,
@@ -286,7 +305,8 @@
"text": "1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -298,7 +318,8 @@
"text": "2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -310,7 +331,8 @@
"text": "3",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -322,7 +344,8 @@
"text": "4",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -336,7 +359,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -348,7 +372,8 @@
"text": "'b'",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -360,7 +385,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -372,7 +398,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -386,7 +413,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -398,7 +426,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -410,7 +439,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -422,7 +452,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -436,7 +467,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -448,7 +480,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -460,7 +493,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -472,7 +506,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -486,7 +521,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -498,7 +534,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -510,7 +547,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -522,7 +560,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
]
]

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "csv-too-many-columns",
"origin": {
"mimetype": "text/csv",
@@ -53,7 +53,8 @@
"text": "1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -65,7 +66,8 @@
"text": "2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -77,7 +79,8 @@
"text": "3",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -89,7 +92,8 @@
"text": "4",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -101,7 +105,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -113,7 +118,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -125,7 +131,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -137,7 +144,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -149,7 +157,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -161,7 +170,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -173,7 +183,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -185,7 +196,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -197,7 +209,8 @@
"text": "e",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -209,7 +222,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -221,7 +235,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -233,7 +248,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -245,7 +261,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -257,7 +274,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -269,7 +287,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -281,7 +300,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -293,7 +313,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
"num_rows": 5,
@@ -310,7 +331,8 @@
"text": "1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -322,7 +344,8 @@
"text": "2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -334,7 +357,8 @@
"text": "3",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -346,7 +370,8 @@
"text": "4",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -358,7 +383,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -372,7 +398,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -384,7 +411,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -396,7 +424,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -408,7 +437,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -420,7 +450,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -434,7 +465,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -446,7 +478,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -458,7 +491,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -470,7 +504,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -482,7 +517,8 @@
"text": "e",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -496,7 +532,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -508,7 +545,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -520,7 +558,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -532,7 +571,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -544,7 +584,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -558,7 +599,8 @@
"text": "a",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -570,7 +612,8 @@
"text": "b",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -582,7 +625,8 @@
"text": "c",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -594,7 +638,8 @@
"text": "d",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -606,7 +651,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
]
]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "ipa20200022300.xml",
"origin": {
"mimetype": "application/xml",

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "pa20010031492.xml",
"origin": {
"mimetype": "application/xml",
@@ -1616,7 +1616,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1628,7 +1629,8 @@
"text": "SENSITIVITY-EC50 VALUES",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1640,7 +1642,8 @@
"text": "SENSITIVITY-EC50 VALUES",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1652,7 +1655,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1664,7 +1668,8 @@
"text": "GROWTH STAGE OF",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1676,7 +1681,8 @@
"text": "ZEOCIN",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1688,7 +1694,8 @@
"text": "CONTROL",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1700,7 +1707,8 @@
"text": "CONTROL",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1712,7 +1720,8 @@
"text": "ASSAY REAGENT",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1724,7 +1733,8 @@
"text": "TREATED",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1736,7 +1746,8 @@
"text": "CELLS",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1748,7 +1759,8 @@
"text": "CELLS",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1760,7 +1772,8 @@
"text": "Lag Phase",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1772,7 +1785,8 @@
"text": "1.445 ppm ZnSO4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1784,7 +1798,8 @@
"text": "1.580 ppm ZnSO4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1796,7 +1811,8 @@
"text": "1.580 ppm ZnSO4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1808,7 +1824,8 @@
"text": "Expotential phase",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1820,7 +1837,8 @@
"text": "0.446 ppm ZnSO4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1832,7 +1850,8 @@
"text": "0.446 ZnSO4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1844,7 +1863,8 @@
"text": "0.446 ZnSO4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1856,7 +1876,8 @@
"text": "Stationary phase",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1868,7 +1889,8 @@
"text": "0.426 ppm ZnSO4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1880,7 +1902,8 @@
"text": "0.457 ppm ZnSO4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1892,7 +1915,8 @@
"text": "0.457 ppm ZnSO4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
"num_rows": 6,
@@ -1909,7 +1933,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1921,7 +1946,8 @@
"text": "SENSITIVITY-EC50 VALUES",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1933,7 +1959,8 @@
"text": "SENSITIVITY-EC50 VALUES",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -1947,7 +1974,8 @@
"text": "GROWTH STAGE OF",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1959,7 +1987,8 @@
"text": "ZEOCIN",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1971,7 +2000,8 @@
"text": "CONTROL",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -1985,7 +2015,8 @@
"text": "ASSAY REAGENT",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1997,7 +2028,8 @@
"text": "TREATED",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2009,7 +2041,8 @@
"text": "CELLS",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -2023,7 +2056,8 @@
"text": "Lag Phase",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2035,7 +2069,8 @@
"text": "1.445 ppm ZnSO4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2047,7 +2082,8 @@
"text": "1.580 ppm ZnSO4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -2061,7 +2097,8 @@
"text": "Expotential phase",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2073,7 +2110,8 @@
"text": "0.446 ppm ZnSO4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2085,7 +2123,8 @@
"text": "0.446 ZnSO4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -2099,7 +2138,8 @@
"text": "Stationary phase",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2111,7 +2151,8 @@
"text": "0.426 ppm ZnSO4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2123,7 +2164,8 @@
"text": "0.457 ppm ZnSO4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
]
]

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "pftaps057006474.txt",
"origin": {
"mimetype": "text/plain",

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "pg06442728.xml",
"origin": {
"mimetype": "application/xml",

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "powerpoint_bad_text",
"origin": {
"mimetype": "application/vnd.ms-powerpoint",

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "powerpoint_sample",
"origin": {
"mimetype": "application/vnd.ms-powerpoint",
@@ -1022,7 +1022,8 @@
"text": "Class1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1034,7 +1035,8 @@
"text": "Class2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1046,7 +1048,8 @@
"text": "A merged with B",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1058,7 +1061,8 @@
"text": "C",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1070,7 +1074,8 @@
"text": "A",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1082,7 +1087,8 @@
"text": "B",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1094,7 +1100,8 @@
"text": "C",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1106,7 +1113,8 @@
"text": "R1",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1118,7 +1126,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1130,7 +1139,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1142,7 +1152,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1154,7 +1165,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1166,7 +1178,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1178,7 +1191,8 @@
"text": "R2",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1190,7 +1204,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1202,7 +1217,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 2,
@@ -1214,7 +1230,8 @@
"text": "R3",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1226,7 +1243,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1238,7 +1256,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1250,7 +1269,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1262,7 +1282,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 3,
@@ -1274,7 +1295,8 @@
"text": "R4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1286,7 +1308,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1298,7 +1321,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1310,7 +1334,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1322,7 +1347,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1334,7 +1360,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1346,7 +1373,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1358,7 +1386,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1370,7 +1399,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1382,7 +1412,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1394,7 +1425,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1406,7 +1438,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1418,7 +1451,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
"num_rows": 9,
@@ -1435,7 +1469,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1447,7 +1482,8 @@
"text": "Class1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1459,7 +1495,8 @@
"text": "Class1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1471,7 +1508,8 @@
"text": "Class1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1483,7 +1521,8 @@
"text": "Class2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1495,7 +1534,8 @@
"text": "Class2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1507,7 +1547,8 @@
"text": "Class2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -1521,7 +1562,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1533,7 +1575,8 @@
"text": "A merged with B",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1545,7 +1588,8 @@
"text": "A merged with B",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1557,7 +1601,8 @@
"text": "C",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1569,7 +1614,8 @@
"text": "A",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1581,7 +1627,8 @@
"text": "B",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1593,7 +1640,8 @@
"text": "C",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -1607,7 +1655,8 @@
"text": "R1",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1619,7 +1668,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1631,7 +1681,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1643,7 +1694,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1655,7 +1707,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1667,7 +1720,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1679,7 +1733,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -1693,7 +1748,8 @@
"text": "R2",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1705,7 +1761,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1717,7 +1774,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1729,7 +1787,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1741,7 +1800,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1753,7 +1813,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1765,7 +1826,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -1779,7 +1841,8 @@
"text": "R3",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1791,7 +1854,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1803,7 +1867,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1815,7 +1880,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1827,7 +1893,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1839,7 +1906,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1851,7 +1919,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -1865,7 +1934,8 @@
"text": "R3",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1877,7 +1947,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1889,7 +1960,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1901,7 +1973,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1913,7 +1986,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1925,7 +1999,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1937,7 +2012,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -1951,7 +2027,8 @@
"text": "R4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1963,7 +2040,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1975,7 +2053,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1987,7 +2066,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -1999,7 +2079,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2011,7 +2092,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2023,7 +2105,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -2037,7 +2120,8 @@
"text": "R4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2049,7 +2133,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2061,7 +2146,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2073,7 +2159,8 @@
"text": "",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2085,7 +2172,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2097,7 +2185,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2109,7 +2198,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -2123,7 +2213,8 @@
"text": "R4",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2135,7 +2226,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2147,7 +2239,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2159,7 +2252,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2171,7 +2265,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2183,7 +2278,8 @@
"text": "True",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -2195,7 +2291,8 @@
"text": "False",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
]
]

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "powerpoint_with_image",
"origin": {
"mimetype": "application/vnd.ms-powerpoint",

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "webvtt_example_01",
"origin": {
"mimetype": "text/vtt",

View File

@@ -1,10 +1,10 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "webvtt_example_02",
"origin": {
"mimetype": "text/vtt",
"binary_hash": 12867774546881601731,
"binary_hash": 5029965721282070624,
"filename": "webvtt_example_02.vtt"
},
"furniture": {

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "webvtt_example_03",
"origin": {
"mimetype": "text/vtt",

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "webp-test",
"origin": {
"mimetype": "application/pdf",

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "ocr_test",
"origin": {
"mimetype": "application/pdf",

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "ocr_test_rotated_180",
"origin": {
"mimetype": "application/pdf",

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "ocr_test_rotated_270",
"origin": {
"mimetype": "application/pdf",

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "ocr_test_rotated_90",
"origin": {
"mimetype": "application/pdf",