diff --git a/docling/backend/mspowerpoint_backend.py b/docling/backend/mspowerpoint_backend.py index 5091b0ce..e086f8d6 100644 --- a/docling/backend/mspowerpoint_backend.py +++ b/docling/backend/mspowerpoint_backend.py @@ -20,6 +20,7 @@ from docling_core.types.doc.document import ContentLayer from PIL import Image, UnidentifiedImageError from pptx import Presentation from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER +from pptx.util import Mm from docling.backend.abstract_backend import ( DeclarativeDocumentBackend, @@ -416,8 +417,21 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB groupedshape, parent_slide, slide_ind, doc, slide_size ) - # Loop through each shape in the slide - for shape in slide.shapes: + # Generate sort keys for shapes based on their top (cluster 3mm) and left positions. + # Manually positioned boxes with a deviation of less than 3mm in their top position + # will be sorted on the same line. + def gen_sort_keys(shapes, max_top_distance=Mm(3)): + top = None + for shape in sorted(shapes, key=lambda s: s.top): + if top is None or abs(top - shape.top) > max_top_distance: + top = shape.top + yield (shape, (top, shape.left)) + + # Loop through each shapes on the slide and sort them by top cluster and left + for shape, sort in sorted( + gen_sort_keys(slide.shapes), + key=lambda s: s[1], + ): handle_shapes(shape, parent_slide, slide_ind, doc, slide_size) # Handle notes slide diff --git a/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.itxt b/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.itxt index ba86c3ba..0b489248 100644 --- a/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.itxt +++ b/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.itxt @@ -1,15 +1,15 @@ item-0 at level 0: unspecified: group _root_ item-1 at level 1: chapter: group slide-0 item-2 at level 2: title: Test Table Slide - item-3 at level 2: paragraph: With footnote - item-4 at level 2: table with [9x7] + item-3 at level 2: table with [9x7] + item-4 at level 2: paragraph: With footnote item-5 at level 1: chapter: group slide-1 item-6 at level 2: title: Second slide title - item-7 at level 2: paragraph: Let’s introduce a list - item-8 at level 2: paragraph: With foo - item-9 at level 2: paragraph: Bar - item-10 at level 2: paragraph: And baz things - item-11 at level 2: paragraph: A rectangle shape with this text inside. + item-7 at level 2: paragraph: A rectangle shape with this text inside. + item-8 at level 2: paragraph: Let’s introduce a list + item-9 at level 2: paragraph: With foo + item-10 at level 2: paragraph: Bar + item-11 at level 2: paragraph: And baz things item-12 at level 1: chapter: group slide-2 item-13 at level 2: ordered_list: group list item-14 at level 3: list_item: List item4 diff --git a/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.json b/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.json index c379c6f6..af3365d4 100644 --- a/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.json +++ b/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.json @@ -42,10 +42,10 @@ "$ref": "#/texts/0" }, { - "$ref": "#/texts/1" + "$ref": "#/tables/0" }, { - "$ref": "#/tables/0" + "$ref": "#/texts/1" } ], "content_layer": "body", @@ -310,6 +310,33 @@ "children": [], "content_layer": "body", "label": "paragraph", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 6180463.0, + "t": 5221995.0, + "r": 10256704.0, + "b": 1344058.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 40 + ] + } + ], + "orig": "A rectangle shape with this text inside.", + "text": "A rectangle shape with this text inside." + }, + { + "self_ref": "#/texts/4", + "parent": { + "$ref": "#/groups/1" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", "prov": [ { "page_no": 2, @@ -330,7 +357,7 @@ "text": "Let’s introduce a list" }, { - "self_ref": "#/texts/4", + "self_ref": "#/texts/5", "parent": { "$ref": "#/groups/1" }, @@ -357,7 +384,7 @@ "text": "With foo" }, { - "self_ref": "#/texts/5", + "self_ref": "#/texts/6", "parent": { "$ref": "#/groups/1" }, @@ -384,7 +411,7 @@ "text": "Bar" }, { - "self_ref": "#/texts/6", + "self_ref": "#/texts/7", "parent": { "$ref": "#/groups/1" }, @@ -410,33 +437,6 @@ "orig": "And baz things", "text": "And baz things" }, - { - "self_ref": "#/texts/7", - "parent": { - "$ref": "#/groups/1" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [ - { - "page_no": 2, - "bbox": { - "l": 6180463.0, - "t": 5221995.0, - "r": 10256704.0, - "b": 1344058.0, - "coord_origin": "BOTTOMLEFT" - }, - "charspan": [ - 0, - 40 - ] - } - ], - "orig": "A rectangle shape with this text inside.", - "text": "A rectangle shape with this text inside." - }, { "self_ref": "#/texts/8", "parent": { diff --git a/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.md b/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.md index ec26faa4..78625815 100644 --- a/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.md +++ b/tests/data/groundtruth/docling_v2/powerpoint_sample.pptx.md @@ -1,7 +1,5 @@ # Test Table Slide -With footnote - | | Class1 | Class1 | Class1 | Class2 | Class2 | Class2 | |----|-----------------|-----------------|----------|----------|----------|----------| | | A merged with B | A merged with B | C | A | B | C | @@ -13,8 +11,12 @@ With footnote | R4 | | True | | True | False | False | | R4 | True | False | True | False | True | False | +With footnote + # Second slide title +A rectangle shape with this text inside. + Let’s introduce a list With foo @@ -23,8 +25,6 @@ Bar And baz things -A rectangle shape with this text inside. - 1. List item4 2. List item5 3. List item6 diff --git a/tests/data/groundtruth/docling_v2/powerpoint_sample_unordered.pptx.itxt b/tests/data/groundtruth/docling_v2/powerpoint_sample_unordered.pptx.itxt new file mode 100644 index 00000000..0b489248 --- /dev/null +++ b/tests/data/groundtruth/docling_v2/powerpoint_sample_unordered.pptx.itxt @@ -0,0 +1,35 @@ +item-0 at level 0: unspecified: group _root_ + item-1 at level 1: chapter: group slide-0 + item-2 at level 2: title: Test Table Slide + item-3 at level 2: table with [9x7] + item-4 at level 2: paragraph: With footnote + item-5 at level 1: chapter: group slide-1 + item-6 at level 2: title: Second slide title + item-7 at level 2: paragraph: A rectangle shape with this text inside. + item-8 at level 2: paragraph: Let’s introduce a list + item-9 at level 2: paragraph: With foo + item-10 at level 2: paragraph: Bar + item-11 at level 2: paragraph: And baz things + item-12 at level 1: chapter: group slide-2 + item-13 at level 2: ordered_list: group list + item-14 at level 3: list_item: List item4 + item-15 at level 3: list_item: List item5 + item-16 at level 3: list_item: List item6 + item-17 at level 2: list: group list + item-18 at level 3: list_item: I1 + item-19 at level 3: list_item: I2 + item-20 at level 3: list_item: I3 + item-21 at level 3: list_item: I4 + item-22 at level 2: paragraph: Some info: + item-23 at level 2: list: group list + item-24 at level 3: list_item: Item A + item-25 at level 3: list_item: Item B + item-26 at level 2: paragraph: Maybe a list? + item-27 at level 2: ordered_list: group list + item-28 at level 3: list_item: List1 + item-29 at level 3: list_item: List2 + item-30 at level 3: list_item: List3 + item-31 at level 2: list: group list + item-32 at level 3: list_item: l1 + item-33 at level 3: list_item: l2 + item-34 at level 3: list_item: l3 \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/powerpoint_sample_unordered.pptx.json b/tests/data/groundtruth/docling_v2/powerpoint_sample_unordered.pptx.json new file mode 100644 index 00000000..e5ad68ed --- /dev/null +++ b/tests/data/groundtruth/docling_v2/powerpoint_sample_unordered.pptx.json @@ -0,0 +1,2230 @@ +{ + "schema_name": "DoclingDocument", + "version": "1.3.0", + "name": "powerpoint_sample_unordered", + "origin": { + "mimetype": "application/vnd.ms-powerpoint", + "binary_hash": 7336652625934663018, + "filename": "powerpoint_sample_unordered.pptx" + }, + "furniture": { + "self_ref": "#/furniture", + "children": [], + "content_layer": "furniture", + "name": "_root_", + "label": "unspecified" + }, + "body": { + "self_ref": "#/body", + "children": [ + { + "$ref": "#/groups/0" + }, + { + "$ref": "#/groups/1" + }, + { + "$ref": "#/groups/2" + } + ], + "content_layer": "body", + "name": "_root_", + "label": "unspecified" + }, + "groups": [ + { + "self_ref": "#/groups/0", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/texts/0" + }, + { + "$ref": "#/tables/0" + }, + { + "$ref": "#/texts/1" + } + ], + "content_layer": "body", + "name": "slide-0", + "label": "chapter" + }, + { + "self_ref": "#/groups/1", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/texts/2" + }, + { + "$ref": "#/texts/3" + }, + { + "$ref": "#/texts/4" + }, + { + "$ref": "#/texts/5" + }, + { + "$ref": "#/texts/6" + }, + { + "$ref": "#/texts/7" + }, + { + "$ref": "#/texts/8" + } + ], + "content_layer": "body", + "name": "slide-1", + "label": "chapter" + }, + { + "self_ref": "#/groups/2", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/groups/3" + }, + { + "$ref": "#/groups/4" + }, + { + "$ref": "#/texts/16" + }, + { + "$ref": "#/groups/5" + }, + { + "$ref": "#/texts/19" + }, + { + "$ref": "#/groups/6" + }, + { + "$ref": "#/groups/7" + }, + { + "$ref": "#/texts/26" + } + ], + "content_layer": "body", + "name": "slide-2", + "label": "chapter" + }, + { + "self_ref": "#/groups/3", + "parent": { + "$ref": "#/groups/2" + }, + "children": [ + { + "$ref": "#/texts/9" + }, + { + "$ref": "#/texts/10" + }, + { + "$ref": "#/texts/11" + } + ], + "content_layer": "body", + "name": "list", + "label": "ordered_list" + }, + { + "self_ref": "#/groups/4", + "parent": { + "$ref": "#/groups/2" + }, + "children": [ + { + "$ref": "#/texts/12" + }, + { + "$ref": "#/texts/13" + }, + { + "$ref": "#/texts/14" + }, + { + "$ref": "#/texts/15" + } + ], + "content_layer": "body", + "name": "list", + "label": "list" + }, + { + "self_ref": "#/groups/5", + "parent": { + "$ref": "#/groups/2" + }, + "children": [ + { + "$ref": "#/texts/17" + }, + { + "$ref": "#/texts/18" + } + ], + "content_layer": "body", + "name": "list", + "label": "list" + }, + { + "self_ref": "#/groups/6", + "parent": { + "$ref": "#/groups/2" + }, + "children": [ + { + "$ref": "#/texts/20" + }, + { + "$ref": "#/texts/21" + }, + { + "$ref": "#/texts/22" + } + ], + "content_layer": "body", + "name": "list", + "label": "ordered_list" + }, + { + "self_ref": "#/groups/7", + "parent": { + "$ref": "#/groups/2" + }, + "children": [ + { + "$ref": "#/texts/23" + }, + { + "$ref": "#/texts/24" + }, + { + "$ref": "#/texts/25" + } + ], + "content_layer": "body", + "name": "list", + "label": "list" + } + ], + "texts": [ + { + "self_ref": "#/texts/0", + "parent": { + "$ref": "#/groups/0" + }, + "children": [], + "content_layer": "body", + "label": "title", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 1524000.0, + "t": 1945640.0, + "r": 10668000.0, + "b": 1122363.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 16 + ] + } + ], + "orig": "Test Table Slide", + "text": "Test Table Slide" + }, + { + "self_ref": "#/texts/1", + "parent": { + "$ref": "#/groups/0" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 1524000.0, + "t": 5888420.0, + "r": 10668000.0, + "b": 5433848.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 13 + ] + } + ], + "orig": "With footnote", + "text": "With footnote" + }, + { + "self_ref": "#/texts/2", + "parent": { + "$ref": "#/groups/1" + }, + "children": [], + "content_layer": "body", + "label": "title", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 838200.0, + "t": 1690688.0, + "r": 11353800.0, + "b": 365125.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 18 + ] + } + ], + "orig": "Second slide title", + "text": "Second slide title" + }, + { + "self_ref": "#/texts/3", + "parent": { + "$ref": "#/groups/1" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 6180463.0, + "t": 5221995.0, + "r": 10256704.0, + "b": 1344058.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 40 + ] + } + ], + "orig": "A rectangle shape with this text inside.", + "text": "A rectangle shape with this text inside." + }, + { + "self_ref": "#/texts/4", + "parent": { + "$ref": "#/groups/1" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 838200.0, + "t": 3962400.0, + "r": 4461831.0, + "b": 1825625.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 50 + ] + } + ], + "orig": "Let’s introduce a list", + "text": "Let’s introduce a list" + }, + { + "self_ref": "#/texts/5", + "parent": { + "$ref": "#/groups/1" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 838200.0, + "t": 3962400.0, + "r": 4461831.0, + "b": 1825625.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 50 + ] + } + ], + "orig": "With foo", + "text": "With foo" + }, + { + "self_ref": "#/texts/6", + "parent": { + "$ref": "#/groups/1" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 838200.0, + "t": 3962400.0, + "r": 4461831.0, + "b": 1825625.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 50 + ] + } + ], + "orig": "Bar", + "text": "Bar" + }, + { + "self_ref": "#/texts/7", + "parent": { + "$ref": "#/groups/1" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 838200.0, + "t": 3962400.0, + "r": 4461831.0, + "b": 1825625.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 50 + ] + } + ], + "orig": "And baz things", + "text": "And baz things" + }, + { + "self_ref": "#/texts/8", + "parent": { + "$ref": "#/groups/1" + }, + "children": [], + "content_layer": "furniture", + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 0.0, + "t": 0.0, + "r": 0.0, + "b": 0.0, + "coord_origin": "TOPLEFT" + }, + "charspan": [ + 0, + 31 + ] + } + ], + "orig": "Some notes on the second slide.", + "text": "Some notes on the second slide." + }, + { + "self_ref": "#/texts/9", + "parent": { + "$ref": "#/groups/3" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 2423634.0, + "t": 3357995.0, + "r": 3928277.0, + "b": 2434665.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 32 + ] + } + ], + "orig": "List item4", + "text": "List item4", + "enumerated": true, + "marker": "1." + }, + { + "self_ref": "#/texts/10", + "parent": { + "$ref": "#/groups/3" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 2423634.0, + "t": 3357995.0, + "r": 3928277.0, + "b": 2434665.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 32 + ] + } + ], + "orig": "List item5", + "text": "List item5", + "enumerated": true, + "marker": "2." + }, + { + "self_ref": "#/texts/11", + "parent": { + "$ref": "#/groups/3" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 2423634.0, + "t": 3357995.0, + "r": 3928277.0, + "b": 2434665.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 32 + ] + } + ], + "orig": "List item6", + "text": "List item6", + "enumerated": true, + "marker": "3." + }, + { + "self_ref": "#/texts/12", + "parent": { + "$ref": "#/groups/4" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 4453634.0, + "t": 3657882.0, + "r": 5109583.0, + "b": 2457553.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 11 + ] + } + ], + "orig": "I1", + "text": "I1", + "enumerated": false, + "marker": "-" + }, + { + "self_ref": "#/texts/13", + "parent": { + "$ref": "#/groups/4" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 4453634.0, + "t": 3657882.0, + "r": 5109583.0, + "b": 2457553.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 11 + ] + } + ], + "orig": "I2", + "text": "I2", + "enumerated": false, + "marker": "-" + }, + { + "self_ref": "#/texts/14", + "parent": { + "$ref": "#/groups/4" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 4453634.0, + "t": 3657882.0, + "r": 5109583.0, + "b": 2457553.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 11 + ] + } + ], + "orig": "I3", + "text": "I3", + "enumerated": false, + "marker": "-" + }, + { + "self_ref": "#/texts/15", + "parent": { + "$ref": "#/groups/4" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 4453634.0, + "t": 3657882.0, + "r": 5109583.0, + "b": 2457553.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 11 + ] + } + ], + "orig": "I4", + "text": "I4", + "enumerated": false, + "marker": "-" + }, + { + "self_ref": "#/texts/16", + "parent": { + "$ref": "#/groups/2" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 5634940.0, + "t": 3380884.0, + "r": 6881050.0, + "b": 2457554.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 24 + ] + } + ], + "orig": "Some info:", + "text": "Some info:" + }, + { + "self_ref": "#/texts/17", + "parent": { + "$ref": "#/groups/5" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 5634940.0, + "t": 3380884.0, + "r": 6881050.0, + "b": 2457554.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 24 + ] + } + ], + "orig": "Item A", + "text": "Item A", + "enumerated": false, + "marker": "-" + }, + { + "self_ref": "#/texts/18", + "parent": { + "$ref": "#/groups/5" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 5634940.0, + "t": 3380884.0, + "r": 6881050.0, + "b": 2457554.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 24 + ] + } + ], + "orig": "Item B", + "text": "Item B", + "enumerated": false, + "marker": "-" + }, + { + "self_ref": "#/texts/19", + "parent": { + "$ref": "#/groups/2" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 7531336.0, + "t": 3659750.0, + "r": 9009626.0, + "b": 2459421.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 31 + ] + } + ], + "orig": "Maybe a list?", + "text": "Maybe a list?" + }, + { + "self_ref": "#/texts/20", + "parent": { + "$ref": "#/groups/6" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 7531336.0, + "t": 3659750.0, + "r": 9009626.0, + "b": 2459421.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 31 + ] + } + ], + "orig": "List1", + "text": "List1", + "enumerated": true, + "marker": "1." + }, + { + "self_ref": "#/texts/21", + "parent": { + "$ref": "#/groups/6" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 7531336.0, + "t": 3659750.0, + "r": 9009626.0, + "b": 2459421.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 31 + ] + } + ], + "orig": "List2", + "text": "List2", + "enumerated": true, + "marker": "2." + }, + { + "self_ref": "#/texts/22", + "parent": { + "$ref": "#/groups/6" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 7531336.0, + "t": 3659750.0, + "r": 9009626.0, + "b": 2459421.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 31 + ] + } + ], + "orig": "List3", + "text": "List3", + "enumerated": true, + "marker": "3." + }, + { + "self_ref": "#/texts/23", + "parent": { + "$ref": "#/groups/7" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 9404392.0, + "t": 3357995.0, + "r": 10060341.0, + "b": 2434665.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 9 + ] + } + ], + "orig": "l1 ", + "text": "l1 ", + "enumerated": false, + "marker": "-" + }, + { + "self_ref": "#/texts/24", + "parent": { + "$ref": "#/groups/7" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 9404392.0, + "t": 3357995.0, + "r": 10060341.0, + "b": 2434665.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 9 + ] + } + ], + "orig": "l2", + "text": "l2", + "enumerated": false, + "marker": "-" + }, + { + "self_ref": "#/texts/25", + "parent": { + "$ref": "#/groups/7" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 9404392.0, + "t": 3357995.0, + "r": 10060341.0, + "b": 2434665.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 9 + ] + } + ], + "orig": "l3", + "text": "l3", + "enumerated": false, + "marker": "-" + }, + { + "self_ref": "#/texts/26", + "parent": { + "$ref": "#/groups/2" + }, + "children": [], + "content_layer": "furniture", + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 0.0, + "t": 0.0, + "r": 0.0, + "b": 0.0, + "coord_origin": "TOPLEFT" + }, + "charspan": [ + 0, + 53 + ] + } + ], + "orig": "Final notes on the third slide.\nSecond line of notes.", + "text": "Final notes on the third slide.\nSecond line of notes." + } + ], + "pictures": [], + "tables": [ + { + "self_ref": "#/tables/0", + "parent": { + "$ref": "#/groups/0" + }, + "children": [], + "content_layer": "body", + "label": "table", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 2031999.0, + "t": 5283200.0, + "r": 10160000.0, + "b": 1945640.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 4, + "text": "Class1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 4, + "end_col_offset_idx": 7, + "text": "Class2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "A merged with B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "R1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "R2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "R3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 3, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "R4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 9, + "num_cols": 7, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 4, + "text": "Class1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 4, + "text": "Class1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 4, + "text": "Class1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 4, + "end_col_offset_idx": 7, + "text": "Class2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 4, + "end_col_offset_idx": 7, + "text": "Class2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 4, + "end_col_offset_idx": 7, + "text": "Class2", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "A merged with B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 3, + "text": "A merged with B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "C", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "R1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "R2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "R3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "R3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 3, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "R4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 3, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "R4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 3, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "R4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "True", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "False", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + } + ], + "key_value_items": [], + "form_items": [], + "pages": { + "1": { + "size": { + "width": 12192000.0, + "height": 6858000.0 + }, + "page_no": 1 + }, + "2": { + "size": { + "width": 12192000.0, + "height": 6858000.0 + }, + "page_no": 2 + }, + "3": { + "size": { + "width": 12192000.0, + "height": 6858000.0 + }, + "page_no": 3 + } + } +} \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/powerpoint_sample_unordered.pptx.md b/tests/data/groundtruth/docling_v2/powerpoint_sample_unordered.pptx.md new file mode 100644 index 00000000..78625815 --- /dev/null +++ b/tests/data/groundtruth/docling_v2/powerpoint_sample_unordered.pptx.md @@ -0,0 +1,50 @@ +# Test Table Slide + +| | Class1 | Class1 | Class1 | Class2 | Class2 | Class2 | +|----|-----------------|-----------------|----------|----------|----------|----------| +| | A merged with B | A merged with B | C | A | B | C | +| R1 | True | False | | False | True | True | +| R2 | | | True | False | | | +| R3 | False | | | | False | | +| R3 | | True | | True | | | +| R4 | | | False | | False | | +| R4 | | True | | True | False | False | +| R4 | True | False | True | False | True | False | + +With footnote + +# Second slide title + +A rectangle shape with this text inside. + +Let’s introduce a list + +With foo + +Bar + +And baz things + +1. List item4 +2. List item5 +3. List item6 + +- I1 +- I2 +- I3 +- I4 + +Some info: + +- Item A +- Item B + +Maybe a list? + +1. List1 +2. List2 +3. List3 + +- l1 +- l2 +- l3 \ No newline at end of file diff --git a/tests/data/pptx/powerpoint_sample_unordered.pptx b/tests/data/pptx/powerpoint_sample_unordered.pptx new file mode 100644 index 00000000..81e21e0d Binary files /dev/null and b/tests/data/pptx/powerpoint_sample_unordered.pptx differ