diff --git a/docling/backend/msword_backend.py b/docling/backend/msword_backend.py index 6cfa0860..ee43caa9 100644 --- a/docling/backend/msword_backend.py +++ b/docling/backend/msword_backend.py @@ -127,6 +127,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend): doc = DoclingDocument(name=self.file.stem or "file", origin=origin) if self.is_valid(): assert self.docx_obj is not None + doc = self._walk_linear(self.docx_obj.element.body, self.docx_obj, doc) return doc else: @@ -256,6 +257,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend): elif drawing_blip: self._handle_pictures(docx_obj, drawing_blip, doc) + self._handle_text_elements(element, docx_obj, doc) # Check for the sdt containers, like table of contents elif tag_name in ["sdt"]: sdt_content = element.find(".//w:sdtContent", namespaces=namespaces) diff --git a/tests/data/docx/paragraph_in_image.docx b/tests/data/docx/paragraph_in_image.docx new file mode 100644 index 00000000..8795e408 Binary files /dev/null and b/tests/data/docx/paragraph_in_image.docx differ diff --git a/tests/data/groundtruth/docling_v2/paragraph_in_image.docx.itxt b/tests/data/groundtruth/docling_v2/paragraph_in_image.docx.itxt new file mode 100644 index 00000000..4dd6a246 --- /dev/null +++ b/tests/data/groundtruth/docling_v2/paragraph_in_image.docx.itxt @@ -0,0 +1,9 @@ +item-0 at level 0: unspecified: group _root_ + item-1 at level 1: paragraph: Transkript + item-2 at level 1: paragraph: 5. März 2025, 01:35PM + item-3 at level 1: paragraph: + item-4 at level 1: picture + item-5 at level 1: inline: group group + item-6 at level 2: paragraph: User + item-7 at level 2: paragraph: 0:08 +Ein beispielhafter Paragraph. \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/paragraph_in_image.docx.json b/tests/data/groundtruth/docling_v2/paragraph_in_image.docx.json new file mode 100644 index 00000000..a365244a --- /dev/null +++ b/tests/data/groundtruth/docling_v2/paragraph_in_image.docx.json @@ -0,0 +1,162 @@ +{ + "schema_name": "DoclingDocument", + "version": "1.3.0", + "name": "paragraph_in_image", + "origin": { + "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "binary_hash": 15839552996279065250, + "filename": "paragraph_in_image.docx" + }, + "furniture": { + "self_ref": "#/furniture", + "children": [], + "content_layer": "furniture", + "name": "_root_", + "label": "unspecified" + }, + "body": { + "self_ref": "#/body", + "children": [ + { + "$ref": "#/texts/0" + }, + { + "$ref": "#/texts/1" + }, + { + "$ref": "#/texts/2" + }, + { + "$ref": "#/pictures/0" + }, + { + "$ref": "#/groups/0" + } + ], + "content_layer": "body", + "name": "_root_", + "label": "unspecified" + }, + "groups": [ + { + "self_ref": "#/groups/0", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/texts/3" + }, + { + "$ref": "#/texts/4" + } + ], + "content_layer": "body", + "name": "group", + "label": "inline" + } + ], + "texts": [ + { + "self_ref": "#/texts/0", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "Transkript", + "text": "Transkript", + "formatting": { + "bold": true, + "italic": false, + "underline": false, + "strikethrough": false + } + }, + { + "self_ref": "#/texts/1", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "5. M\u00e4rz 2025, 01:35PM", + "text": "5. M\u00e4rz 2025, 01:35PM" + }, + { + "self_ref": "#/texts/2", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, + { + "self_ref": "#/texts/3", + "parent": { + "$ref": "#/groups/0" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "User", + "text": "User", + "formatting": { + "bold": true, + "italic": false, + "underline": false, + "strikethrough": false + } + }, + { + "self_ref": "#/texts/4", + "parent": { + "$ref": "#/groups/0" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "0:08\nEin beispielhafter Paragraph.", + "text": "0:08\nEin beispielhafter Paragraph." + } + ], + "pictures": [ + { + "self_ref": "#/pictures/0", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "picture", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "image": { + "mimetype": "image/png", + "dpi": 72, + "size": { + "width": 100.0, + "height": 100.0 + }, + "uri": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAYAAABw4pVUAAALp0lEQVR4nO2de2wU1xXGv3vHD2xj7A0GA4YYKA+X8MjDMdAWUhWUJinGQJAKCkilUigUUlGiVCmIUJryR5WIVhGPQpoqKbQkVQs20IIa0hZDxSMuDYESoDTYQMAYGz/AGK+ZudW5OwYbz6x3dmfG6539SSslzMzO9f32vs499xyGKKdsVlZ/aC1jNLAvC2AYY3gYAgMAZAHIBJAKIFG/vQXAbQB1AKrBcEUIXGTAeQ7xGXjiyfwd1VcRxTBEGWWF6XkaT/gGoE0CYxMADLb5FeUQ4gjAD3Lt7t/yd988gygiKgQpK+o1XnBlphCiEMAol19/mjG2m2nqzvyShqPwqiBlhelZGuffAWPzAIxDdHACQmzjmvZu/u6b1Z4QpKzIN0blWMKEWBgtLdQAIRjbomjYkF9SexIuwlwVgolXGDAf3QgBbFUEe8MtYZgbXZOqKKsZsBTdGAGsV1R1jdNdmaOCHCvyfR9MrNWnp7FAHQRbWVBSu7FbCVL2fHqeUPk6AfYsYhAGsZcp2vL8P9k/ZbZdkGMzM16EYOsBJCG28YOJpQU769+OWkGOzvT9ignxPXgIwdjm8TtrF0WVIGXTfA9ritgKhsnwIgKlXGXz8/fUXuxyQWiVrTL+AQNy4WEEUKEI7duRrvYjEqRsRsbTGtgOAGmRfE8M0cghZuUX1//VdUHKZvm+JTSxW0TvartLYDSscFaYv6P2z2E+H17LEGD74mIEEQXimXBaCgtnzNAY/yjeTXVKIxfaFKtjCrM6m1ITRKnXB3BLA/1dNtnK7IuH/O0AaGobFyN0qK7kcsAC3Mqiz7PrjEhgmCzrLuTbQzeHbImoYF6HiYWhmFlYKIZCTVVOeMA25TR+rqjjOjNIdtplkdU2LoYtJOl1ibAFof2MWDWhdwVUl3KPKJwuSzohKMp/Y2hzKVqo46o63Gzn0bSF0LZrXAxHyNTrNvQWQg4JGhOfOlOeOAQXbKyR44RhCyHvEKN/j2MfZnXcoYXEW0fXtpIOLYSc2Fwsk6dRDeqaGcysqpx2D+o9eTYyC55D6uDRSPRlgyengikJ928QApr/DrTm2/BXf4HGz0+g9vAu1B/fb/h9I9cUo9fYpwyvNV06i1M/IJ/t0LDzu0JAcFXt23bG1aYWAOlr65AYSkpP9H9+ObKmvIDEjD4AC/IaxsCTU+QnoVdvpA4diz5T5uH6h++hfNMPEUMwvc7fNBREd3y2nfTRX0Pui28gZdDI4EIEg57jCmKOQJ2/2WEMoY0nJ7zQMx6fiqHLNiPl4bzwxYhtxul1314QOp/hRDeVM3cFknrTgac4ZrSt+3tdln5Yxlb6z34ZqUNGB7+JBnAavG9cheZvlv/Ek5KR6OsHpUeaJ1qVXvev3hNEHiNz4ORS+qivgCmtx/860nytHJUl61G19x3D6ymDRqL31+fIGVlKznDEMKNIAzpeJwUJnOkTtr4hfdREJPUZaHq9pf46Lr27CrVH9pje03TpLC5vXSM/fabObz81jjECGiAgiDxgafNsV0nLAEswbx1NF88EFeNBru+3tDXdDSENsDEwqAdOu9o+oPPEZNPryVk56DFgmN2v7b7oGnB5Dtz+o8doqa2C2tRoej0pOxcD562SwsWRDCYtOB3KhwM0nCyFeqvW9DrjCnwTCpH3sz3IeGyKE0XofmgtYzhFSHDq+2+eOQoIzfwGxpA6dByGr/i9FMY3YRq8DGnBKVyFUy+o/Wcx/DWdR7JgCUlIf+SrGPaj32LspuPoP2uZJ7sy0oLL2CEOQd1W5e6NUG83hPYAY0juNwQD572GMRv/JYXxEqQF1wO5OMa1XRtRuWsj1KZblkqWmNlXCvPIulLvjDECA7geVcdRrnzwc1RsfhnNlRekqSRkGEPqkDEYuvxt9P3mAniALO6WZ0nNgT/gP8sn4+qOX6KlrsqSMAk9fRg4f7UXRMnkerwpV6Bu6/K2n+KTBSNx6b3XpGlEaGrIK/8+z3zXcDHpr7mCGCGVBDG3bzgIGRVpO/Ts6hlya1ZrudPpMz1yhsE3saNRWqh3TZ+hXce0YY+jm5DY5da6m6cOyQ8N3IMWrEXKwBGmJnee2AM9Rzxp6fvJfEOtK2SYuXet2lgPp+F6WLwup/7fH+HcmlloOHUw6PiSnN3xvBCNSeKu3/B+JSVdtqxQoO6QbGxmqHcszBTDo4XrMQqjAn/NFVTv34q7wX6JBvvqzZXlEHdbTLus9Lx7O6RB6TXuKekFY4jQ0Hz1czjMba4HjLQd6rezCxdbXnG31FZBa26y9Ezz1f+Zi8gY0sdO7tQsQ+X0TZwOTruUBlCZmi45Hp6xjgRxJP4T/YH9il6SCzvaXAqVtBFPIKGXz1I/fvP0YTRfqzB9htyOcl5YJb1fzMqau2idNN8Ea70NJw7AYaoTKJSqzZuF7SBTSO7iXyB7+hLUfbwXNf94X053jSole/oSZE9bJAdvQ4TAnS/OGV6qP/4h0oY/ZvosTRaGr9iOmycPynvV5kBP3TNvPDKfeDrgiGG2fy80NJw6hDtXzsNRGK4k6HFtnX0PV+T+OH36zXgJWtMt+G9U3puuKilpSHqoP1iQDS2CuqWGT41/pbWHd6P3pNlIyTV3DSDRMwuelR8r+G9ck4ZSpyEtEijIMFyExKFpaIqVqSghBG59dgQ1pX+EEfTrrdr3jlzRK6m9YBe0Pqr5+3ZpKHUaGfBZRnzuBtyuOI3LW38S9J6qfb+RJppQFpmhQFaEG4eKcfl3r8MNSAtO4bcRzQiBxnNlqNi0zHDseZCKLa+gsmSDNeuyAeTsTe5JF95aDNfgiSfl8HFsRuYFu/fVaZGVM/fH0pU0rC5ECDmzqdr7a2mQtAqt/HPmvIrULz1qyX2IWkVT+Slc3va6XKy6SHlBcd2QgCBFGdvB2Bynjx+kDMpD4kP9wZN6yE9bREsz1OamwPGD88eDHj+wuh7KmjoPPUcWICkrRw7sbQVq+95bZ4+hev82+X7XEeL9gpL6uaxNONcN7pcizj0EW0LhZ6UljbIE3L8Spyto1UAKoqdsON0lJYlDnG5Nm3HP1kwpG1r/O467tK37+4Jo6k6XyxHHoO7bWU2Ozcj8JIpyeXiFEwXFdY+2/k/77TEhtnVFiTyNaF/n7QShzDIBB7o4LiH0OjcWhM5LU2YZt0rjdQRjWx6MCtRhR5/S/LhaKg+jGNR1B0Eo9gal+XGtVB5FAFtDjgZEOZdcKZWHUUzq2FAQvZVQUpY4DkB1a5ZkzNQrjBJgOeWR4nHq9LqFJUHk6C/YSseK5VUEWxks01un/g0fz8j4SzwyqX3JxJ4srn8uori9lI1MJsCKEyl+vS4RkSAyEjMT3TopZFTAxNJQ0uyFFIyfYpZTNjJbCuZBBGObQ02vZ8lH7lhR5oF4hgSLCJQWlNQZxwyMNH8IpYajJCVWy+RVBFBBdWblGUuCUKYYSg1H6Xwsl857NMo0ehZzG1oShKCcSpQajhJfWX3WKzAyq1P6vDByGloWhKDsY5QaLi5KkLR5YeYyDEsQgvL0UWq4ePfVjkaZLi/MHIYRCULQr4BSw8UHegQGcEqTF0GWz4gFIaifpNRwNL2DVxEolenxIsyDS8TTd0dZ+u6IW0hbZMGYWOgR25ef/lY7xbBdEIJMBJSNjCybiFEYxF76G0M1h1j7bgfRverXxlDqpDrazyAvdade4HjYaEqBQTmXGLC0u2+7Kqq6Jtjmkh24FsebMvdQmh8GWLLtRIN3CDkkmO2B243rgdWlMBxLmJCDf7QGdhfkxEZ+U24J0UqXVYjM5kPJTAL5M6LFwfsE+dqSe6fTXZMZUfELpfwZlLJBBLIE2J4UoBNO0/kMOhJgx8IuJgRpSxllapCB6bVJevhtu6Nul0OIIwA/SMfIWk8uRQtRJwgeQIZC11rGUJBhimsrw9oGIqlS8E6aTlOIwtaoeBSjiYKYkD9ZNcUO0UOHnJcBEnjiyfwd1Z0HEu5C/g+9jIOcGKpmBAAAAABJRU5ErkJggg==" + }, + "annotations": [] + } + ], + "tables": [], + "key_value_items": [], + "form_items": [], + "pages": {} +} \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/paragraph_in_image.docx.md b/tests/data/groundtruth/docling_v2/paragraph_in_image.docx.md new file mode 100644 index 00000000..034f3bfd --- /dev/null +++ b/tests/data/groundtruth/docling_v2/paragraph_in_image.docx.md @@ -0,0 +1,8 @@ +**Transkript** + +5. März 2025, 01:35PM + + + +**User** 0:08 +Ein beispielhafter Paragraph. \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/test_emf_docx.docx.itxt b/tests/data/groundtruth/docling_v2/test_emf_docx.docx.itxt index 220b5533..346093c2 100644 --- a/tests/data/groundtruth/docling_v2/test_emf_docx.docx.itxt +++ b/tests/data/groundtruth/docling_v2/test_emf_docx.docx.itxt @@ -2,7 +2,10 @@ item-0 at level 0: unspecified: group _root_ item-1 at level 1: paragraph: Test with three images in unusual formats item-2 at level 1: paragraph: Raster in emf: item-3 at level 1: picture - item-4 at level 1: paragraph: Vector in emf: - item-5 at level 1: picture - item-6 at level 1: paragraph: Raster in webp: - item-7 at level 1: picture \ No newline at end of file + item-4 at level 1: paragraph: + item-5 at level 1: paragraph: Vector in emf: + item-6 at level 1: picture + item-7 at level 1: paragraph: + item-8 at level 1: paragraph: Raster in webp: + item-9 at level 1: picture + item-10 at level 1: paragraph: \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/test_emf_docx.docx.json b/tests/data/groundtruth/docling_v2/test_emf_docx.docx.json index bb8807f6..98f2abc1 100644 --- a/tests/data/groundtruth/docling_v2/test_emf_docx.docx.json +++ b/tests/data/groundtruth/docling_v2/test_emf_docx.docx.json @@ -29,14 +29,23 @@ { "$ref": "#/texts/2" }, - { - "$ref": "#/pictures/1" - }, { "$ref": "#/texts/3" }, + { + "$ref": "#/pictures/1" + }, + { + "$ref": "#/texts/4" + }, + { + "$ref": "#/texts/5" + }, { "$ref": "#/pictures/2" + }, + { + "$ref": "#/texts/6" } ], "content_layer": "body", @@ -78,8 +87,8 @@ "content_layer": "body", "label": "paragraph", "prov": [], - "orig": "Vector in emf:", - "text": "Vector in emf:" + "orig": "", + "text": "" }, { "self_ref": "#/texts/3", @@ -90,8 +99,44 @@ "content_layer": "body", "label": "paragraph", "prov": [], + "orig": "Vector in emf:", + "text": "Vector in emf:" + }, + { + "self_ref": "#/texts/4", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, + { + "self_ref": "#/texts/5", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], "orig": "Raster in webp:", "text": "Raster in webp:" + }, + { + "self_ref": "#/texts/6", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" } ], "pictures": [ diff --git a/tests/data/groundtruth/docling_v2/word_sample.docx.itxt b/tests/data/groundtruth/docling_v2/word_sample.docx.itxt index ce60ad26..b4d98b44 100644 --- a/tests/data/groundtruth/docling_v2/word_sample.docx.itxt +++ b/tests/data/groundtruth/docling_v2/word_sample.docx.itxt @@ -3,27 +3,28 @@ item-0 at level 0: unspecified: group _root_ item-2 at level 1: title: Swimming in the lake item-3 at level 2: paragraph: Duck item-4 at level 2: picture - item-5 at level 2: paragraph: Figure 1: This is a cute duckling - item-6 at level 2: section_header: Let’s swim! - item-7 at level 3: paragraph: To get started with swimming, fi ... down in a water and try not to drown: - item-8 at level 3: list: group list - item-9 at level 4: list_item: You can relax and look around - item-10 at level 4: list_item: Paddle about - item-11 at level 4: list_item: Enjoy summer warmth - item-12 at level 3: paragraph: Also, don’t forget: - item-13 at level 3: list: group list - item-14 at level 4: list_item: Wear sunglasses - item-15 at level 4: list_item: Don’t forget to drink water - item-16 at level 4: list_item: Use sun cream - item-17 at level 3: paragraph: Hmm, what else… - item-18 at level 3: section_header: Let’s eat - item-19 at level 4: paragraph: After we had a good day of swimm ... , it’s important to eat something nice - item-20 at level 4: paragraph: I like to eat leaves - item-21 at level 4: paragraph: Here are some interesting things a respectful duck could eat: - item-22 at level 4: table with [4x3] - item-23 at level 4: paragraph: - item-24 at level 4: paragraph: And let’s add another list in the end: - item-25 at level 4: list: group list - item-26 at level 5: list_item: Leaves - item-27 at level 5: list_item: Berries - item-28 at level 5: list_item: Grain \ No newline at end of file + item-5 at level 2: paragraph: + item-6 at level 2: paragraph: Figure 1: This is a cute duckling + item-7 at level 2: section_header: Let’s swim! + item-8 at level 3: paragraph: To get started with swimming, fi ... down in a water and try not to drown: + item-9 at level 3: list: group list + item-10 at level 4: list_item: You can relax and look around + item-11 at level 4: list_item: Paddle about + item-12 at level 4: list_item: Enjoy summer warmth + item-13 at level 3: paragraph: Also, don’t forget: + item-14 at level 3: list: group list + item-15 at level 4: list_item: Wear sunglasses + item-16 at level 4: list_item: Don’t forget to drink water + item-17 at level 4: list_item: Use sun cream + item-18 at level 3: paragraph: Hmm, what else… + item-19 at level 3: section_header: Let’s eat + item-20 at level 4: paragraph: After we had a good day of swimm ... , it’s important to eat something nice + item-21 at level 4: paragraph: I like to eat leaves + item-22 at level 4: paragraph: Here are some interesting things a respectful duck could eat: + item-23 at level 4: table with [4x3] + item-24 at level 4: paragraph: + item-25 at level 4: paragraph: And let’s add another list in the end: + item-26 at level 4: list: group list + item-27 at level 5: list_item: Leaves + item-28 at level 5: list_item: Berries + item-29 at level 5: list_item: Grain \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/word_sample.docx.json b/tests/data/groundtruth/docling_v2/word_sample.docx.json index 1d305cbc..355ac741 100644 --- a/tests/data/groundtruth/docling_v2/word_sample.docx.json +++ b/tests/data/groundtruth/docling_v2/word_sample.docx.json @@ -32,17 +32,17 @@ { "self_ref": "#/groups/0", "parent": { - "$ref": "#/texts/4" + "$ref": "#/texts/5" }, "children": [ - { - "$ref": "#/texts/6" - }, { "$ref": "#/texts/7" }, { "$ref": "#/texts/8" + }, + { + "$ref": "#/texts/9" } ], "content_layer": "body", @@ -52,17 +52,17 @@ { "self_ref": "#/groups/1", "parent": { - "$ref": "#/texts/4" + "$ref": "#/texts/5" }, "children": [ - { - "$ref": "#/texts/10" - }, { "$ref": "#/texts/11" }, { "$ref": "#/texts/12" + }, + { + "$ref": "#/texts/13" } ], "content_layer": "body", @@ -72,17 +72,17 @@ { "self_ref": "#/groups/2", "parent": { - "$ref": "#/texts/14" + "$ref": "#/texts/15" }, "children": [ - { - "$ref": "#/texts/20" - }, { "$ref": "#/texts/21" }, { "$ref": "#/texts/22" + }, + { + "$ref": "#/texts/23" } ], "content_layer": "body", @@ -120,6 +120,9 @@ }, { "$ref": "#/texts/4" + }, + { + "$ref": "#/texts/5" } ], "content_layer": "body", @@ -149,32 +152,44 @@ "content_layer": "body", "label": "paragraph", "prov": [], - "orig": "Figure 1: This is a cute duckling", - "text": "Figure 1: This is a cute duckling" + "orig": "", + "text": "" }, { "self_ref": "#/texts/4", "parent": { "$ref": "#/texts/1" }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "Figure 1: This is a cute duckling", + "text": "Figure 1: This is a cute duckling" + }, + { + "self_ref": "#/texts/5", + "parent": { + "$ref": "#/texts/1" + }, "children": [ { - "$ref": "#/texts/5" + "$ref": "#/texts/6" }, { "$ref": "#/groups/0" }, { - "$ref": "#/texts/9" + "$ref": "#/texts/10" }, { "$ref": "#/groups/1" }, { - "$ref": "#/texts/13" + "$ref": "#/texts/14" }, { - "$ref": "#/texts/14" + "$ref": "#/texts/15" } ], "content_layer": "body", @@ -185,9 +200,9 @@ "level": 1 }, { - "self_ref": "#/texts/5", + "self_ref": "#/texts/6", "parent": { - "$ref": "#/texts/4" + "$ref": "#/texts/5" }, "children": [], "content_layer": "body", @@ -197,7 +212,7 @@ "text": "To get started with swimming, first lay down in a water and try not to drown:" }, { - "self_ref": "#/texts/6", + "self_ref": "#/texts/7", "parent": { "$ref": "#/groups/0" }, @@ -211,7 +226,7 @@ "marker": "-" }, { - "self_ref": "#/texts/7", + "self_ref": "#/texts/8", "parent": { "$ref": "#/groups/0" }, @@ -225,7 +240,7 @@ "marker": "-" }, { - "self_ref": "#/texts/8", + "self_ref": "#/texts/9", "parent": { "$ref": "#/groups/0" }, @@ -239,9 +254,9 @@ "marker": "-" }, { - "self_ref": "#/texts/9", + "self_ref": "#/texts/10", "parent": { - "$ref": "#/texts/4" + "$ref": "#/texts/5" }, "children": [], "content_layer": "body", @@ -251,7 +266,7 @@ "text": "Also, don\u2019t forget:" }, { - "self_ref": "#/texts/10", + "self_ref": "#/texts/11", "parent": { "$ref": "#/groups/1" }, @@ -265,7 +280,7 @@ "marker": "-" }, { - "self_ref": "#/texts/11", + "self_ref": "#/texts/12", "parent": { "$ref": "#/groups/1" }, @@ -279,7 +294,7 @@ "marker": "-" }, { - "self_ref": "#/texts/12", + "self_ref": "#/texts/13", "parent": { "$ref": "#/groups/1" }, @@ -293,9 +308,9 @@ "marker": "-" }, { - "self_ref": "#/texts/13", + "self_ref": "#/texts/14", "parent": { - "$ref": "#/texts/4" + "$ref": "#/texts/5" }, "children": [], "content_layer": "body", @@ -305,29 +320,29 @@ "text": "Hmm, what else\u2026" }, { - "self_ref": "#/texts/14", + "self_ref": "#/texts/15", "parent": { - "$ref": "#/texts/4" + "$ref": "#/texts/5" }, "children": [ - { - "$ref": "#/texts/15" - }, { "$ref": "#/texts/16" }, { "$ref": "#/texts/17" }, - { - "$ref": "#/tables/0" - }, { "$ref": "#/texts/18" }, + { + "$ref": "#/tables/0" + }, { "$ref": "#/texts/19" }, + { + "$ref": "#/texts/20" + }, { "$ref": "#/groups/2" } @@ -340,9 +355,9 @@ "level": 2 }, { - "self_ref": "#/texts/15", + "self_ref": "#/texts/16", "parent": { - "$ref": "#/texts/14" + "$ref": "#/texts/15" }, "children": [], "content_layer": "body", @@ -352,9 +367,9 @@ "text": "After we had a good day of swimming in the lake, it\u2019s important to eat something nice" }, { - "self_ref": "#/texts/16", + "self_ref": "#/texts/17", "parent": { - "$ref": "#/texts/14" + "$ref": "#/texts/15" }, "children": [], "content_layer": "body", @@ -364,9 +379,9 @@ "text": "I like to eat leaves" }, { - "self_ref": "#/texts/17", + "self_ref": "#/texts/18", "parent": { - "$ref": "#/texts/14" + "$ref": "#/texts/15" }, "children": [], "content_layer": "body", @@ -376,9 +391,9 @@ "text": "Here are some interesting things a respectful duck could eat:" }, { - "self_ref": "#/texts/18", + "self_ref": "#/texts/19", "parent": { - "$ref": "#/texts/14" + "$ref": "#/texts/15" }, "children": [], "content_layer": "body", @@ -388,9 +403,9 @@ "text": "" }, { - "self_ref": "#/texts/19", + "self_ref": "#/texts/20", "parent": { - "$ref": "#/texts/14" + "$ref": "#/texts/15" }, "children": [], "content_layer": "body", @@ -400,7 +415,7 @@ "text": "And let\u2019s add another list in the end:" }, { - "self_ref": "#/texts/20", + "self_ref": "#/texts/21", "parent": { "$ref": "#/groups/2" }, @@ -414,7 +429,7 @@ "marker": "-" }, { - "self_ref": "#/texts/21", + "self_ref": "#/texts/22", "parent": { "$ref": "#/groups/2" }, @@ -428,7 +443,7 @@ "marker": "-" }, { - "self_ref": "#/texts/22", + "self_ref": "#/texts/23", "parent": { "$ref": "#/groups/2" }, @@ -471,7 +486,7 @@ { "self_ref": "#/tables/0", "parent": { - "$ref": "#/texts/14" + "$ref": "#/texts/15" }, "children": [], "content_layer": "body",