feat: use w:lastRenderedPageBreaks if present to get approximate pagination

Signed-off-by: David Huggins-Daines <dhdaines@logisphere.ca>
This commit is contained in:
David Huggins-Daines
2025-01-29 08:32:52 -05:00
parent b5da4080c9
commit 147c7a1bc9
6 changed files with 2841 additions and 175 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -93,7 +93,22 @@
},
"children": [],
"label": "paragraph",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
17
]
}
],
"orig": "Summer activities",
"text": "Summer activities"
},
@@ -117,7 +132,22 @@
}
],
"label": "title",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
20
]
}
],
"orig": "Swimming in the lake",
"text": "Swimming in the lake"
},
@@ -128,7 +158,22 @@
},
"children": [],
"label": "paragraph",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
4
]
}
],
"orig": "Duck",
"text": "Duck"
},
@@ -139,7 +184,22 @@
},
"children": [],
"label": "paragraph",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
33
]
}
],
"orig": "Figure 1: This is a cute duckling",
"text": "Figure 1: This is a cute duckling"
},
@@ -169,7 +229,22 @@
}
],
"label": "section_header",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
11
]
}
],
"orig": "Let\u2019s swim!",
"text": "Let\u2019s swim!",
"level": 1
@@ -181,7 +256,22 @@
},
"children": [],
"label": "paragraph",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
77
]
}
],
"orig": "To get started with swimming, first lay down in a water and try not to drown:",
"text": "To get started with swimming, first lay down in a water and try not to drown:"
},
@@ -192,7 +282,22 @@
},
"children": [],
"label": "list_item",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
29
]
}
],
"orig": "You can relax and look around",
"text": "You can relax and look around",
"enumerated": false,
@@ -205,7 +310,22 @@
},
"children": [],
"label": "list_item",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
12
]
}
],
"orig": "Paddle about",
"text": "Paddle about",
"enumerated": false,
@@ -218,7 +338,22 @@
},
"children": [],
"label": "list_item",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
19
]
}
],
"orig": "Enjoy summer warmth",
"text": "Enjoy summer warmth",
"enumerated": false,
@@ -231,7 +366,22 @@
},
"children": [],
"label": "paragraph",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
19
]
}
],
"orig": "Also, don\u2019t forget:",
"text": "Also, don\u2019t forget:"
},
@@ -242,7 +392,22 @@
},
"children": [],
"label": "list_item",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
15
]
}
],
"orig": "Wear sunglasses",
"text": "Wear sunglasses",
"enumerated": false,
@@ -255,7 +420,22 @@
},
"children": [],
"label": "list_item",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
27
]
}
],
"orig": "Don\u2019t forget to drink water",
"text": "Don\u2019t forget to drink water",
"enumerated": false,
@@ -268,7 +448,22 @@
},
"children": [],
"label": "list_item",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
13
]
}
],
"orig": "Use sun cream",
"text": "Use sun cream",
"enumerated": false,
@@ -281,7 +476,22 @@
},
"children": [],
"label": "paragraph",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
15
]
}
],
"orig": "Hmm, what else\u2026",
"text": "Hmm, what else\u2026"
},
@@ -314,7 +524,22 @@
}
],
"label": "section_header",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
9
]
}
],
"orig": "Let\u2019s eat",
"text": "Let\u2019s eat",
"level": 2
@@ -326,7 +551,22 @@
},
"children": [],
"label": "paragraph",
"prov": [],
"prov": [
{
"page_no": 2,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
85
]
}
],
"orig": "After we had a good day of swimming in the lake, it\u2019s important to eat something nice",
"text": "After we had a good day of swimming in the lake, it\u2019s important to eat something nice"
},
@@ -337,7 +577,22 @@
},
"children": [],
"label": "paragraph",
"prov": [],
"prov": [
{
"page_no": 2,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
20
]
}
],
"orig": "I like to eat leaves",
"text": "I like to eat leaves"
},
@@ -348,7 +603,22 @@
},
"children": [],
"label": "paragraph",
"prov": [],
"prov": [
{
"page_no": 2,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
61
]
}
],
"orig": "Here are some interesting things a respectful duck could eat:",
"text": "Here are some interesting things a respectful duck could eat:"
},
@@ -359,7 +629,22 @@
},
"children": [],
"label": "paragraph",
"prov": [],
"prov": [
{
"page_no": 2,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
0
]
}
],
"orig": "",
"text": ""
},
@@ -370,7 +655,22 @@
},
"children": [],
"label": "paragraph",
"prov": [],
"prov": [
{
"page_no": 2,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
38
]
}
],
"orig": "And let\u2019s add another list in the end:",
"text": "And let\u2019s add another list in the end:"
},
@@ -381,7 +681,22 @@
},
"children": [],
"label": "list_item",
"prov": [],
"prov": [
{
"page_no": 2,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
6
]
}
],
"orig": "Leaves",
"text": "Leaves",
"enumerated": false,
@@ -394,7 +709,22 @@
},
"children": [],
"label": "list_item",
"prov": [],
"prov": [
{
"page_no": 2,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
7
]
}
],
"orig": "Berries",
"text": "Berries",
"enumerated": false,
@@ -407,7 +737,22 @@
},
"children": [],
"label": "list_item",
"prov": [],
"prov": [
{
"page_no": 2,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
5
]
}
],
"orig": "Grain",
"text": "Grain",
"enumerated": false,
@@ -422,7 +767,22 @@
},
"children": [],
"label": "picture",
"prov": [],
"prov": [
{
"page_no": 1,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 397.0,
"b": 397.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
0
]
}
],
"captions": [],
"references": [],
"footnotes": [],
@@ -446,7 +806,22 @@
},
"children": [],
"label": "table",
"prov": [],
"prov": [
{
"page_no": 2,
"bbox": {
"l": 0.0,
"t": 0.0,
"r": 0.0,
"b": 0.0,
"coord_origin": "TOPLEFT"
},
"charspan": [
0,
120
]
}
],
"captions": [],
"references": [],
"footnotes": [],
@@ -757,5 +1132,20 @@
}
],
"key_value_items": [],
"pages": {}
"pages": {
"1": {
"size": {
"width": 612.0,
"height": 792.0
},
"page_no": 1
},
"2": {
"size": {
"width": 612.0,
"height": 792.0
},
"page_no": 2
}
}
}