feat: Rich tables for MSWord backend (#2291)

* Adding support of rich table cells to MSWord backend

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>

* Fixes for properly accounting lists, pictures and headers in rich table cells

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>

* Cleaned up msword backend, re-generated docx tests

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>

* Added detection of simple table cells in word backend

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>

* Cleaned up

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>

---------

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
Co-authored-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
Maxim Lysak
2025-09-22 16:41:59 +02:00
committed by GitHub
parent 46efaaefee
commit e2482a2ada
27 changed files with 1103 additions and 787 deletions

View File

@@ -1,40 +1,40 @@
item-0 at level 0: unspecified: group _root_
item-1 at level 1: inline: group group
item-2 at level 2: paragraph: This is a word document and this is an inline equation:
item-2 at level 2: text: This is a word document and this is an inline equation:
item-3 at level 2: formula: A= \pi r^{2}
item-4 at level 2: paragraph: . If instead, I want an equation by line, I can do this:
item-5 at level 1: paragraph:
item-4 at level 2: text: . If instead, I want an equation by line, I can do this:
item-5 at level 1: text:
item-6 at level 1: formula: a^{2}+b^{2}=c^{2} \text{ \texttimes } 23
item-7 at level 1: paragraph: And that is an equation by itself. Cheers!
item-8 at level 1: paragraph:
item-9 at level 1: paragraph: This is another equation:
item-7 at level 1: text: And that is an equation by itself. Cheers!
item-8 at level 1: text:
item-9 at level 1: text: This is another equation:
item-10 at level 1: formula: f\left(x\right)=a_{0}+\sum_{n=1} ... })+b_{n}\sin(\frac{n \pi x}{L})\right)
item-11 at level 1: paragraph:
item-12 at level 1: paragraph: This is text. This is text. This ... s is text. This is text. This is text.
item-13 at level 1: paragraph:
item-14 at level 1: paragraph:
item-11 at level 1: text:
item-12 at level 1: text: This is text. This is text. This ... s is text. This is text. This is text.
item-13 at level 1: text:
item-14 at level 1: text:
item-15 at level 1: inline: group group
item-16 at level 2: paragraph: This is a word document and this is an inline equation:
item-16 at level 2: text: This is a word document and this is an inline equation:
item-17 at level 2: formula: A= \pi r^{2}
item-18 at level 2: paragraph: . If instead, I want an equation by line, I can do this:
item-19 at level 1: paragraph:
item-18 at level 2: text: . If instead, I want an equation by line, I can do this:
item-19 at level 1: text:
item-20 at level 1: formula: \left(x+a\right)^{n}=\sum_{k=0}^ ... ac{}{}{0pt}{}{n}{k}\right)x^{k}a^{n-k}
item-21 at level 1: paragraph:
item-22 at level 1: paragraph: And that is an equation by itself. Cheers!
item-23 at level 1: paragraph:
item-24 at level 1: paragraph: This is another equation:
item-25 at level 1: paragraph:
item-21 at level 1: text:
item-22 at level 1: text: And that is an equation by itself. Cheers!
item-23 at level 1: text:
item-24 at level 1: text: This is another equation:
item-25 at level 1: text:
item-26 at level 1: formula: \left(1+x\right)^{n}=1+\frac{nx} ... ght)x^{2}}{2!}+ \text{ \textellipsis }
item-27 at level 1: paragraph:
item-28 at level 1: paragraph: This is text. This is text. This ... s is text. This is text. This is text.
item-29 at level 1: paragraph:
item-30 at level 1: paragraph:
item-27 at level 1: text:
item-28 at level 1: text: This is text. This is text. This ... s is text. This is text. This is text.
item-29 at level 1: text:
item-30 at level 1: text:
item-31 at level 1: inline: group group
item-32 at level 2: paragraph: This is a word document and this is an inline equation:
item-32 at level 2: text: This is a word document and this is an inline equation:
item-33 at level 2: formula: A= \pi r^{2}
item-34 at level 2: paragraph: . If instead, I want an equation by line, I can do this:
item-35 at level 1: paragraph:
item-34 at level 2: text: . If instead, I want an equation by line, I can do this:
item-35 at level 1: text:
item-36 at level 1: formula: e^{x}=1+\frac{x}{1!}+\frac{x^{2} ... xtellipsis } , - \infty < x < \infty
item-37 at level 1: paragraph:
item-38 at level 1: paragraph: And that is an equation by itself. Cheers!
item-39 at level 1: paragraph:
item-37 at level 1: text:
item-38 at level 1: text: And that is an equation by itself. Cheers!
item-39 at level 1: text:

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "equations",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -182,7 +182,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "This is a word document and this is an inline equation: ",
"text": "This is a word document and this is an inline equation: "
@@ -206,7 +206,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": ". If instead, I want an equation by line, I can do this:",
"text": ". If instead, I want an equation by line, I can do this:"
@@ -218,7 +218,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -242,7 +242,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "And that is an equation by itself. Cheers!",
"text": "And that is an equation by itself. Cheers!",
@@ -261,7 +261,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -273,7 +273,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "This is another equation:",
"text": "This is another equation:",
@@ -304,7 +304,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -316,7 +316,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
@@ -335,7 +335,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -347,7 +347,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -359,7 +359,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "This is a word document and this is an inline equation: ",
"text": "This is a word document and this is an inline equation: "
@@ -383,7 +383,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": ". If instead, I want an equation by line, I can do this:",
"text": ". If instead, I want an equation by line, I can do this:"
@@ -395,7 +395,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -419,7 +419,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -431,7 +431,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "And that is an equation by itself. Cheers!",
"text": "And that is an equation by itself. Cheers!",
@@ -450,7 +450,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -462,7 +462,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "This is another equation:",
"text": "This is another equation:",
@@ -481,7 +481,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -505,7 +505,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -517,7 +517,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
@@ -536,7 +536,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -548,7 +548,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -560,7 +560,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "This is a word document and this is an inline equation: ",
"text": "This is a word document and this is an inline equation: "
@@ -584,7 +584,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": ". If instead, I want an equation by line, I can do this:",
"text": ". If instead, I want an equation by line, I can do this:"
@@ -596,7 +596,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -620,7 +620,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -632,7 +632,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "And that is an equation by itself. Cheers!",
"text": "And that is an equation by itself. Cheers!",
@@ -651,7 +651,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""

View File

@@ -1,10 +1,10 @@
item-0 at level 0: unspecified: group _root_
item-1 at level 1: paragraph: Lorem ipsum dolor sit amet, cons ... quam non, sodales sem. Nulla facilisi.
item-2 at level 1: paragraph:
item-3 at level 1: paragraph: Duis condimentum dui eget ullamc ... cus tempor, et tristique ante aliquet.
item-4 at level 1: paragraph:
item-5 at level 1: paragraph: Maecenas id neque pharetra, elei ... ulla faucibus eu. Donec ut nisl metus.
item-6 at level 1: paragraph:
item-7 at level 1: paragraph: Duis ac tellus sed turpis feugia ... pellentesque rhoncus, blandit eu nisl.
item-8 at level 1: paragraph:
item-9 at level 1: paragraph: Nunc vehicula mattis erat ac con ... udin, vehicula turpis eu, tempus nibh.
item-1 at level 1: text: Lorem ipsum dolor sit amet, cons ... quam non, sodales sem. Nulla facilisi.
item-2 at level 1: text:
item-3 at level 1: text: Duis condimentum dui eget ullamc ... cus tempor, et tristique ante aliquet.
item-4 at level 1: text:
item-5 at level 1: text: Maecenas id neque pharetra, elei ... ulla faucibus eu. Donec ut nisl metus.
item-6 at level 1: text:
item-7 at level 1: text: Duis ac tellus sed turpis feugia ... pellentesque rhoncus, blandit eu nisl.
item-8 at level 1: text:
item-9 at level 1: text: Nunc vehicula mattis erat ac con ... udin, vehicula turpis eu, tempus nibh.

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "lorem_ipsum",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -58,7 +58,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin elit mi, fermentum vitae dolor facilisis, porttitor mollis quam. Cras quam massa, venenatis faucibus libero vel, euismod sollicitudin ipsum. Aliquam semper sapien leo, ac ultrices nibh mollis congue. Cras luctus ultrices est, ut scelerisque eros euismod ut. Curabitur ac tincidunt felis, non scelerisque lectus. Praesent sollicitudin vulputate est id consequat. Vestibulum pharetra ligula sit amet varius porttitor. Sed eros diam, gravida non varius at, scelerisque in libero. Ut auctor finibus mauris sit amet ornare. Sed facilisis leo at urna rhoncus, in facilisis arcu eleifend. Sed tincidunt lacinia fermentum. Cras non purus fringilla, semper quam non, sodales sem. Nulla facilisi.",
"text": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin elit mi, fermentum vitae dolor facilisis, porttitor mollis quam. Cras quam massa, venenatis faucibus libero vel, euismod sollicitudin ipsum. Aliquam semper sapien leo, ac ultrices nibh mollis congue. Cras luctus ultrices est, ut scelerisque eros euismod ut. Curabitur ac tincidunt felis, non scelerisque lectus. Praesent sollicitudin vulputate est id consequat. Vestibulum pharetra ligula sit amet varius porttitor. Sed eros diam, gravida non varius at, scelerisque in libero. Ut auctor finibus mauris sit amet ornare. Sed facilisis leo at urna rhoncus, in facilisis arcu eleifend. Sed tincidunt lacinia fermentum. Cras non purus fringilla, semper quam non, sodales sem. Nulla facilisi.",
@@ -77,7 +77,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -89,7 +89,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Duis condimentum dui eget ullamcorper maximus. Nulla tortor lectus, hendrerit at diam fermentum, euismod ornare orci. Integer ac mauris sed augue ultricies pellentesque. Etiam condimentum turpis a risus dictum, sed tempor arcu vestibulum. Quisque at venenatis tellus. Morbi id lobortis elit. In gravida metus at ornare suscipit. Donec euismod nibh sit amet commodo porttitor. Integer commodo sit amet nisi vel accumsan. Donec lacinia posuere porta. Pellentesque vulputate porta risus, vel consectetur nisl gravida sit amet. Nam scelerisque enim sodales lacus tempor, et tristique ante aliquet.",
"text": "Duis condimentum dui eget ullamcorper maximus. Nulla tortor lectus, hendrerit at diam fermentum, euismod ornare orci. Integer ac mauris sed augue ultricies pellentesque. Etiam condimentum turpis a risus dictum, sed tempor arcu vestibulum. Quisque at venenatis tellus. Morbi id lobortis elit. In gravida metus at ornare suscipit. Donec euismod nibh sit amet commodo porttitor. Integer commodo sit amet nisi vel accumsan. Donec lacinia posuere porta. Pellentesque vulputate porta risus, vel consectetur nisl gravida sit amet. Nam scelerisque enim sodales lacus tempor, et tristique ante aliquet.",
@@ -108,7 +108,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -120,7 +120,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Maecenas id neque pharetra, eleifend lectus a, vehicula sapien. Aliquam erat volutpat. Ut arcu erat, blandit id elementum at, aliquet pretium mauris. Nulla at semper orci. Nunc sed maximus metus. Duis eget tristique arcu. Phasellus fringilla augue est, ut bibendum est bibendum vitae. Nam et urna interdum, egestas velit a, consectetur metus. Pellentesque facilisis vehicula orci, eu posuere justo imperdiet non. Vestibulum tincidunt orci ac lorem consequat semper. Fusce semper sollicitudin orci, id lacinia nulla faucibus eu. Donec ut nisl metus.",
"text": "Maecenas id neque pharetra, eleifend lectus a, vehicula sapien. Aliquam erat volutpat. Ut arcu erat, blandit id elementum at, aliquet pretium mauris. Nulla at semper orci. Nunc sed maximus metus. Duis eget tristique arcu. Phasellus fringilla augue est, ut bibendum est bibendum vitae. Nam et urna interdum, egestas velit a, consectetur metus. Pellentesque facilisis vehicula orci, eu posuere justo imperdiet non. Vestibulum tincidunt orci ac lorem consequat semper. Fusce semper sollicitudin orci, id lacinia nulla faucibus eu. Donec ut nisl metus.",
@@ -139,7 +139,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -151,7 +151,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Duis ac tellus sed turpis feugiat aliquam sed vel justo. Fusce sit amet volutpat massa. Duis tristique finibus metus quis tincidunt. Etiam dapibus fringilla diam at pharetra. Vivamus dolor est, hendrerit ac ligula nec, pharetra lacinia sapien. Phasellus at malesuada orci. Maecenas est justo, mollis non ultrices ut, sagittis commodo odio. Integer viverra mauris pellentesque bibendum vestibulum. Sed eu felis mattis, efficitur justo non, finibus lorem. Phasellus viverra diam et sapien imperdiet interdum. Cras a convallis libero. Integer maximus dui vel lorem hendrerit, sit amet convallis ligula lobortis. Duis eu lacus elementum, scelerisque nunc eget, dignissim libero. Suspendisse mi quam, vehicula sit amet pellentesque rhoncus, blandit eu nisl.",
"text": "Duis ac tellus sed turpis feugiat aliquam sed vel justo. Fusce sit amet volutpat massa. Duis tristique finibus metus quis tincidunt. Etiam dapibus fringilla diam at pharetra. Vivamus dolor est, hendrerit ac ligula nec, pharetra lacinia sapien. Phasellus at malesuada orci. Maecenas est justo, mollis non ultrices ut, sagittis commodo odio. Integer viverra mauris pellentesque bibendum vestibulum. Sed eu felis mattis, efficitur justo non, finibus lorem. Phasellus viverra diam et sapien imperdiet interdum. Cras a convallis libero. Integer maximus dui vel lorem hendrerit, sit amet convallis ligula lobortis. Duis eu lacus elementum, scelerisque nunc eget, dignissim libero. Suspendisse mi quam, vehicula sit amet pellentesque rhoncus, blandit eu nisl.",
@@ -170,7 +170,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -182,7 +182,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Nunc vehicula mattis erat ac consectetur. Etiam pharetra mauris ut tempor pellentesque. Sed vel libero vitae ante tempus sagittis vel sit amet dolor. Etiam faucibus viverra sodales. Pellentesque ullamcorper magna libero, non malesuada dui bibendum quis. Donec sed dolor non sem luctus volutpat. Morbi vel diam ut urna euismod gravida a id lectus. Vestibulum vel mauris eu tellus hendrerit dapibus. Etiam scelerisque lacus vel ante ultricies vulputate. In ullamcorper malesuada justo, vel scelerisque nisl lacinia at. Donec sodales interdum ipsum, ac bibendum ipsum pharetra interdum. Vivamus condimentum ac ante vel aliquam. Ut consectetur eu nibh nec gravida. Vestibulum accumsan, purus at mollis rutrum, sapien tortor accumsan purus, vitae fermentum urna mauris ut lacus. Fusce vitae leo sollicitudin, vehicula turpis eu, tempus nibh.",
"text": "Nunc vehicula mattis erat ac consectetur. Etiam pharetra mauris ut tempor pellentesque. Sed vel libero vitae ante tempus sagittis vel sit amet dolor. Etiam faucibus viverra sodales. Pellentesque ullamcorper magna libero, non malesuada dui bibendum quis. Donec sed dolor non sem luctus volutpat. Morbi vel diam ut urna euismod gravida a id lectus. Vestibulum vel mauris eu tellus hendrerit dapibus. Etiam scelerisque lacus vel ante ultricies vulputate. In ullamcorper malesuada justo, vel scelerisque nisl lacinia at. Donec sodales interdum ipsum, ac bibendum ipsum pharetra interdum. Vivamus condimentum ac ante vel aliquam. Ut consectetur eu nibh nec gravida. Vestibulum accumsan, purus at mollis rutrum, sapien tortor accumsan purus, vitae fermentum urna mauris ut lacus. Fusce vitae leo sollicitudin, vehicula turpis eu, tempus nibh.",

View File

@@ -1,3 +1,3 @@
item-0 at level 0: unspecified: group _root_
item-1 at level 1: table with [2x2]
item-2 at level 1: paragraph:
item-2 at level 1: text:

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "table_with_equations",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -37,7 +37,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -69,7 +69,8 @@
"text": "The next cell has an equation",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -81,7 +82,8 @@
"text": "$A= \\pi r^{2}$",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -93,7 +95,8 @@
"text": "The next cell has another equation",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -105,7 +108,8 @@
"text": "$x=\\frac{-b \\pm \\sqrt{b^{2}-4ac}}{2a}$",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
"num_rows": 2,
@@ -122,7 +126,8 @@
"text": "The next cell has an equation",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -134,7 +139,8 @@
"text": "$A= \\pi r^{2}$",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -148,7 +154,8 @@
"text": "The next cell has another equation",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -160,7 +167,8 @@
"text": "$x=\\frac{-b \\pm \\sqrt{b^{2}-4ac}}{2a}$",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
]
]

View File

@@ -2,9 +2,9 @@ item-0 at level 0: unspecified: group _root_
item-1 at level 1: list: group list
item-2 at level 2: list_item: Hello world1
item-3 at level 2: list_item: Hello2
item-4 at level 1: paragraph:
item-5 at level 1: paragraph: Some text before
item-4 at level 1: text:
item-5 at level 1: text: Some text before
item-6 at level 1: table with [3x3]
item-7 at level 1: paragraph:
item-8 at level 1: paragraph:
item-9 at level 1: paragraph: Some text after
item-7 at level 1: text:
item-8 at level 1: text:
item-9 at level 1: text: Some text after

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "tablecell",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -112,7 +112,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -124,7 +124,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Some text before",
"text": "Some text before",
@@ -143,7 +143,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -155,7 +155,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -167,7 +167,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Some text after",
"text": "Some text after",
@@ -206,7 +206,8 @@
"text": "Tab1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -218,7 +219,8 @@
"text": "Tab2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -230,7 +232,8 @@
"text": "Tab3",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -242,7 +245,8 @@
"text": "A",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -254,7 +258,8 @@
"text": "B",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -266,7 +271,8 @@
"text": "C",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -278,7 +284,8 @@
"text": "D",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -290,7 +297,8 @@
"text": "E",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -302,7 +310,8 @@
"text": "F",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
"num_rows": 3,
@@ -319,7 +328,8 @@
"text": "Tab1",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -331,7 +341,8 @@
"text": "Tab2",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -343,7 +354,8 @@
"text": "Tab3",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -357,7 +369,8 @@
"text": "A",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -369,7 +382,8 @@
"text": "B",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -381,7 +395,8 @@
"text": "C",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -395,7 +410,8 @@
"text": "D",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -407,7 +423,8 @@
"text": "E",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -419,7 +436,8 @@
"text": "F",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
]
]

View File

@@ -1,8 +1,8 @@
item-0 at level 0: unspecified: group _root_
item-1 at level 1: paragraph: Test with three images in unusual formats
item-2 at level 1: paragraph: Raster in emf:
item-1 at level 1: text: Test with three images in unusual formats
item-2 at level 1: text: Raster in emf:
item-3 at level 1: picture
item-4 at level 1: paragraph: Vector in emf:
item-4 at level 1: text: Vector in emf:
item-5 at level 1: picture
item-6 at level 1: paragraph: Raster in webp:
item-6 at level 1: text: Raster in webp:
item-7 at level 1: picture

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "test_emf_docx",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -52,7 +52,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Test with three images in unusual formats",
"text": "Test with three images in unusual formats",
@@ -71,7 +71,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Raster in emf:",
"text": "Raster in emf:",
@@ -90,7 +90,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Vector in emf:",
"text": "Vector in emf:",
@@ -109,7 +109,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Raster in webp:",
"text": "Raster in webp:",

View File

@@ -1,90 +1,90 @@
item-0 at level 0: unspecified: group _root_
item-1 at level 1: paragraph: Chiayi County Shuishang Township ... mentary School Affiliated Kindergarten
item-2 at level 1: paragraph: Infectious Disease Reporting Pro ... r the 113th Academic Year Kindergarten
item-3 at level 1: paragraph:
item-1 at level 1: text: Chiayi County Shuishang Township ... mentary School Affiliated Kindergarten
item-2 at level 1: text: Infectious Disease Reporting Pro ... r the 113th Academic Year Kindergarten
item-3 at level 1: text:
item-4 at level 1: section: group textbox
item-5 at level 2: paragraph: Student falls ill
item-6 at level 2: paragraph:
item-5 at level 2: text: Student falls ill
item-6 at level 2: text:
item-7 at level 2: list: group list
item-8 at level 3: list_item: Suggested Reportable Symptoms:
... sh
Blisters
Headache
Sore throat
item-9 at level 1: paragraph:
item-10 at level 1: paragraph:
item-9 at level 1: text:
item-10 at level 1: text:
item-11 at level 1: section: group textbox
item-12 at level 2: paragraph: If a caregiver suspects that wit ... the same suggested reportable symptoms
item-13 at level 1: paragraph:
item-14 at level 1: paragraph:
item-15 at level 1: paragraph:
item-16 at level 1: paragraph:
item-12 at level 2: text: If a caregiver suspects that wit ... the same suggested reportable symptoms
item-13 at level 1: text:
item-14 at level 1: text:
item-15 at level 1: text:
item-16 at level 1: text:
item-17 at level 1: section: group textbox
item-18 at level 2: paragraph: Yes
item-19 at level 1: paragraph:
item-20 at level 1: paragraph:
item-18 at level 2: text: Yes
item-19 at level 1: text:
item-20 at level 1: text:
item-21 at level 1: section: group textbox
item-22 at level 2: list: group list
item-23 at level 3: list_item: A report must be submitted withi ... saster Prevention Information Network.
item-24 at level 3: list_item: A report must also be submitted ... d Infectious Disease Reporting System.
item-25 at level 2: paragraph:
item-25 at level 2: text:
item-26 at level 1: list: group list
item-27 at level 1: paragraph:
item-28 at level 1: paragraph:
item-29 at level 1: paragraph:
item-30 at level 1: paragraph:
item-31 at level 1: paragraph:
item-27 at level 1: text:
item-28 at level 1: text:
item-29 at level 1: text:
item-30 at level 1: text:
item-31 at level 1: text:
item-32 at level 1: section: group textbox
item-33 at level 2: paragraph: Health Bureau:
item-34 at level 2: paragraph: Upon receiving a report from the ... rt to the Centers for Disease Control.
item-33 at level 2: text: Health Bureau:
item-34 at level 2: text: Upon receiving a report from the ... rt to the Centers for Disease Control.
item-35 at level 2: list: group list
item-36 at level 3: list_item: If necessary, provide health edu ... vidual to undergo specimen collection.
item-37 at level 3: list_item: Implement appropriate epidemic p ... the Communicable Disease Control Act.
item-38 at level 2: paragraph:
item-38 at level 2: text:
item-39 at level 1: list: group list
item-40 at level 1: paragraph:
item-40 at level 1: text:
item-41 at level 1: section: group textbox
item-42 at level 2: paragraph: Department of Education:
item-42 at level 2: text: Department of Education:
Collabo ... vention measures at all school levels.
item-43 at level 1: paragraph:
item-44 at level 1: paragraph:
item-45 at level 1: paragraph:
item-46 at level 1: paragraph:
item-47 at level 1: paragraph:
item-48 at level 1: paragraph:
item-49 at level 1: paragraph:
item-43 at level 1: text:
item-44 at level 1: text:
item-45 at level 1: text:
item-46 at level 1: text:
item-47 at level 1: text:
item-48 at level 1: text:
item-49 at level 1: text:
item-50 at level 1: section: group textbox
item-51 at level 2: inline: group group
item-52 at level 3: paragraph: The Health Bureau will handle
item-53 at level 3: paragraph: reporting and specimen collection
item-54 at level 3: paragraph: .
item-55 at level 2: paragraph:
item-56 at level 1: paragraph:
item-57 at level 1: paragraph:
item-58 at level 1: paragraph:
item-52 at level 3: text: The Health Bureau will handle
item-53 at level 3: text: reporting and specimen collection
item-54 at level 3: text: .
item-55 at level 2: text:
item-56 at level 1: text:
item-57 at level 1: text:
item-58 at level 1: text:
item-59 at level 1: section: group textbox
item-60 at level 2: paragraph: Whether the epidemic has eased.
item-61 at level 2: paragraph:
item-62 at level 1: paragraph:
item-60 at level 2: text: Whether the epidemic has eased.
item-61 at level 2: text:
item-62 at level 1: text:
item-63 at level 1: section: group textbox
item-64 at level 2: paragraph: Whether the test results are pos ... legally designated infectious disease.
item-65 at level 2: paragraph: No
item-66 at level 1: paragraph:
item-67 at level 1: paragraph:
item-64 at level 2: text: Whether the test results are pos ... legally designated infectious disease.
item-65 at level 2: text: No
item-66 at level 1: text:
item-67 at level 1: text:
item-68 at level 1: section: group textbox
item-69 at level 2: paragraph: Yes
item-70 at level 1: paragraph:
item-69 at level 2: text: Yes
item-70 at level 1: text:
item-71 at level 1: section: group textbox
item-72 at level 2: paragraph: Yes
item-73 at level 1: paragraph:
item-74 at level 1: paragraph:
item-72 at level 2: text: Yes
item-73 at level 1: text:
item-74 at level 1: text:
item-75 at level 1: section: group textbox
item-76 at level 2: paragraph: Case closed.
item-77 at level 2: paragraph:
item-78 at level 2: paragraph: The Health Bureau will carry out ... ters for Disease Control if necessary.
item-79 at level 1: paragraph:
item-76 at level 2: text: Case closed.
item-77 at level 2: text:
item-78 at level 2: text: The Health Bureau will carry out ... ters for Disease Control if necessary.
item-79 at level 1: text:
item-80 at level 1: section: group textbox
item-81 at level 2: paragraph: No
item-82 at level 1: paragraph:
item-83 at level 1: paragraph:
item-84 at level 1: paragraph:
item-81 at level 2: text: No
item-82 at level 1: text:
item-83 at level 1: text:
item-84 at level 1: text:

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "textbox",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -491,7 +491,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Chiayi County Shuishang Township Nanjing Elementary School Affiliated Kindergarten",
"text": "Chiayi County Shuishang Township Nanjing Elementary School Affiliated Kindergarten",
@@ -510,7 +510,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Infectious Disease Reporting Procedure for the 113th Academic Year Kindergarten",
"text": "Infectious Disease Reporting Procedure for the 113th Academic Year Kindergarten",
@@ -529,7 +529,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -541,7 +541,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Student falls ill",
"text": "Student falls ill",
@@ -560,7 +560,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -593,7 +593,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -605,7 +605,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -617,7 +617,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "If a caregiver suspects that within one week, a fifth of the class (for classes with more than 15 students) or more than three students (for classes with 15 or fewer students)\nshow the same suggested reportable symptoms",
"text": "If a caregiver suspects that within one week, a fifth of the class (for classes with more than 15 students) or more than three students (for classes with 15 or fewer students)\nshow the same suggested reportable symptoms",
@@ -636,7 +636,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -648,7 +648,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -660,7 +660,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -672,7 +672,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -684,7 +684,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Yes",
"text": "Yes",
@@ -703,7 +703,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -715,7 +715,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -769,7 +769,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -781,7 +781,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -793,7 +793,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -805,7 +805,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -817,7 +817,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -829,7 +829,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -841,7 +841,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Health Bureau:",
"text": "Health Bureau:",
@@ -860,7 +860,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Upon receiving a report from the kindergarten, conduct a preliminary assessment of the case, and depending on the situation and type of illness, carry out an epidemiological investigation and report to the Centers for Disease Control.",
"text": "Upon receiving a report from the kindergarten, conduct a preliminary assessment of the case, and depending on the situation and type of illness, carry out an epidemiological investigation and report to the Centers for Disease Control.",
@@ -921,7 +921,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -933,7 +933,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -945,7 +945,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.",
"text": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.",
@@ -964,7 +964,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -976,7 +976,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -988,7 +988,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1000,7 +1000,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1012,7 +1012,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1024,7 +1024,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1036,7 +1036,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1048,7 +1048,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "The Health Bureau will handle",
"text": "The Health Bureau will handle",
@@ -1067,7 +1067,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "reporting and specimen collection",
"text": "reporting and specimen collection",
@@ -1086,7 +1086,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": ".",
"text": ".",
@@ -1105,7 +1105,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1117,7 +1117,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1129,7 +1129,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1141,7 +1141,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1153,7 +1153,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Whether the epidemic has eased.",
"text": "Whether the epidemic has eased.",
@@ -1172,7 +1172,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1184,7 +1184,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1196,7 +1196,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Whether the test results are positive for a legally designated infectious disease.",
"text": "Whether the test results are positive for a legally designated infectious disease.",
@@ -1215,7 +1215,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "No",
"text": "No",
@@ -1234,7 +1234,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1246,7 +1246,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1258,7 +1258,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Yes",
"text": "Yes",
@@ -1277,7 +1277,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1289,7 +1289,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Yes",
"text": "Yes",
@@ -1308,7 +1308,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1320,7 +1320,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1332,7 +1332,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Case closed.",
"text": "Case closed.",
@@ -1351,7 +1351,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1363,7 +1363,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary.",
"text": "The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary.",
@@ -1382,7 +1382,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1394,7 +1394,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "No",
"text": "No",
@@ -1413,7 +1413,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1425,7 +1425,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1437,7 +1437,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""

View File

@@ -1,18 +1,18 @@
item-0 at level 0: unspecified: group _root_
item-1 at level 1: paragraph: italic
item-2 at level 1: paragraph: bold
item-3 at level 1: paragraph: underline
item-4 at level 1: paragraph: hyperlink
item-5 at level 1: paragraph: italic and bold hyperlink
item-1 at level 1: text: italic
item-2 at level 1: text: bold
item-3 at level 1: text: underline
item-4 at level 1: text: hyperlink
item-5 at level 1: text: italic and bold hyperlink
item-6 at level 1: inline: group group
item-7 at level 2: paragraph: Normal
item-8 at level 2: paragraph: italic
item-9 at level 2: paragraph: bold
item-10 at level 2: paragraph: underline
item-11 at level 2: paragraph: and
item-12 at level 2: paragraph: hyperlink
item-13 at level 2: paragraph: on the same line
item-14 at level 1: paragraph:
item-7 at level 2: text: Normal
item-8 at level 2: text: italic
item-9 at level 2: text: bold
item-10 at level 2: text: underline
item-11 at level 2: text: and
item-12 at level 2: text: hyperlink
item-13 at level 2: text: on the same line
item-14 at level 1: text:
item-15 at level 1: list: group list
item-16 at level 2: list_item: Italic bullet 1
item-17 at level 2: list_item: Bold bullet 2
@@ -29,4 +29,4 @@ item-0 at level 0: unspecified: group _root_
item-28 at level 5: text: Nested
item-29 at level 5: text: italic
item-30 at level 5: text: bold
item-31 at level 1: paragraph:
item-31 at level 1: text:

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "unit_test_formatting",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -174,7 +174,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "italic",
"text": "italic",
@@ -193,7 +193,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "bold",
"text": "bold",
@@ -212,7 +212,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "underline",
"text": "underline",
@@ -231,7 +231,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "hyperlink",
"text": "hyperlink",
@@ -251,7 +251,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "italic and bold hyperlink",
"text": "italic and bold hyperlink",
@@ -271,7 +271,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Normal",
"text": "Normal",
@@ -290,7 +290,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "italic",
"text": "italic",
@@ -309,7 +309,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "bold",
"text": "bold",
@@ -328,7 +328,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "underline",
"text": "underline",
@@ -347,7 +347,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "and",
"text": "and",
@@ -366,7 +366,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "hyperlink",
"text": "hyperlink",
@@ -386,7 +386,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "on the same line",
"text": "on the same line",
@@ -405,7 +405,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -649,7 +649,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""

View File

@@ -1,48 +1,48 @@
item-0 at level 0: unspecified: group _root_
item-1 at level 1: title: Test Document
item-2 at level 2: paragraph:
item-2 at level 2: text:
item-3 at level 2: section_header: Section 1
item-4 at level 3: paragraph:
item-5 at level 3: paragraph: Paragraph 1.1
item-6 at level 3: paragraph:
item-7 at level 3: paragraph: Paragraph 1.2
item-8 at level 3: paragraph:
item-4 at level 3: text:
item-5 at level 3: text: Paragraph 1.1
item-6 at level 3: text:
item-7 at level 3: text: Paragraph 1.2
item-8 at level 3: text:
item-9 at level 3: section_header: Section 1.1
item-10 at level 4: paragraph:
item-11 at level 4: paragraph: Paragraph 1.1.1
item-12 at level 4: paragraph:
item-13 at level 4: paragraph: Paragraph 1.1.2
item-14 at level 4: paragraph:
item-10 at level 4: text:
item-11 at level 4: text: Paragraph 1.1.1
item-12 at level 4: text:
item-13 at level 4: text: Paragraph 1.1.2
item-14 at level 4: text:
item-15 at level 3: section_header: Section 1.2
item-16 at level 4: paragraph:
item-17 at level 4: paragraph: Paragraph 1.1.1
item-18 at level 4: paragraph:
item-19 at level 4: paragraph: Paragraph 1.1.2
item-20 at level 4: paragraph:
item-16 at level 4: text:
item-17 at level 4: text: Paragraph 1.1.1
item-18 at level 4: text:
item-19 at level 4: text: Paragraph 1.1.2
item-20 at level 4: text:
item-21 at level 4: section_header: Section 1.2.3
item-22 at level 5: paragraph:
item-23 at level 5: paragraph: Paragraph 1.2.3.1
item-24 at level 5: paragraph:
item-25 at level 5: paragraph: Paragraph 1.2.3.1
item-26 at level 5: paragraph:
item-27 at level 5: paragraph:
item-22 at level 5: text:
item-23 at level 5: text: Paragraph 1.2.3.1
item-24 at level 5: text:
item-25 at level 5: text: Paragraph 1.2.3.1
item-26 at level 5: text:
item-27 at level 5: text:
item-28 at level 2: section_header: Section 2
item-29 at level 3: paragraph:
item-30 at level 3: paragraph: Paragraph 2.1
item-31 at level 3: paragraph:
item-32 at level 3: paragraph: Paragraph 2.2
item-33 at level 3: paragraph:
item-29 at level 3: text:
item-30 at level 3: text: Paragraph 2.1
item-31 at level 3: text:
item-32 at level 3: text: Paragraph 2.2
item-33 at level 3: text:
item-34 at level 3: section: group header-2
item-35 at level 4: section_header: Section 2.1.1
item-36 at level 5: paragraph:
item-37 at level 5: paragraph: Paragraph 2.1.1.1
item-38 at level 5: paragraph:
item-39 at level 5: paragraph: Paragraph 2.1.1.1
item-40 at level 5: paragraph:
item-36 at level 5: text:
item-37 at level 5: text: Paragraph 2.1.1.1
item-38 at level 5: text:
item-39 at level 5: text: Paragraph 2.1.1.1
item-40 at level 5: text:
item-41 at level 3: section_header: Section 2.1
item-42 at level 4: paragraph:
item-43 at level 4: paragraph: Paragraph 2.1.1
item-44 at level 4: paragraph:
item-45 at level 4: paragraph: Paragraph 2.1.2
item-46 at level 4: paragraph:
item-47 at level 4: paragraph:
item-42 at level 4: text:
item-43 at level 4: text: Paragraph 2.1.1
item-44 at level 4: text:
item-45 at level 4: text: Paragraph 2.1.2
item-46 at level 4: text:
item-47 at level 4: text:

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "unit_test_headers",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -71,7 +71,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -118,7 +118,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -130,7 +130,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 1.1",
"text": "Paragraph 1.1",
@@ -149,7 +149,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -161,7 +161,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 1.2",
"text": "Paragraph 1.2",
@@ -180,7 +180,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -221,7 +221,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -233,7 +233,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 1.1.1",
"text": "Paragraph 1.1.1",
@@ -252,7 +252,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -264,7 +264,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 1.1.2",
"text": "Paragraph 1.1.2",
@@ -283,7 +283,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -327,7 +327,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -339,7 +339,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 1.1.1",
"text": "Paragraph 1.1.1",
@@ -358,7 +358,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -370,7 +370,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 1.1.2",
"text": "Paragraph 1.1.2",
@@ -389,7 +389,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -433,7 +433,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -445,7 +445,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 1.2.3.1",
"text": "Paragraph 1.2.3.1",
@@ -464,7 +464,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -476,7 +476,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 1.2.3.1",
"text": "Paragraph 1.2.3.1",
@@ -495,7 +495,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -507,7 +507,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -554,7 +554,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -566,7 +566,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 2.1",
"text": "Paragraph 2.1",
@@ -585,7 +585,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -597,7 +597,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 2.2",
"text": "Paragraph 2.2",
@@ -616,7 +616,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -657,7 +657,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -669,7 +669,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 2.1.1.1",
"text": "Paragraph 2.1.1.1",
@@ -688,7 +688,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -700,7 +700,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 2.1.1.1",
"text": "Paragraph 2.1.1.1",
@@ -719,7 +719,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -763,7 +763,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -775,7 +775,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 2.1.1",
"text": "Paragraph 2.1.1",
@@ -794,7 +794,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -806,7 +806,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 2.1.2",
"text": "Paragraph 2.1.2",
@@ -825,7 +825,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -837,7 +837,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""

View File

@@ -1,52 +1,52 @@
item-0 at level 0: unspecified: group _root_
item-1 at level 1: title: Test Document
item-2 at level 2: paragraph:
item-2 at level 2: text:
item-3 at level 2: section_header: 1 Section 1
item-4 at level 1: paragraph:
item-5 at level 1: paragraph: Paragraph 1.1
item-6 at level 1: paragraph:
item-7 at level 1: paragraph: Paragraph 1.2
item-8 at level 1: paragraph:
item-4 at level 1: text:
item-5 at level 1: text: Paragraph 1.1
item-6 at level 1: text:
item-7 at level 1: text: Paragraph 1.2
item-8 at level 1: text:
item-9 at level 1: section: group header-0
item-10 at level 2: section: group header-1
item-11 at level 3: section_header: 1.1 Section 1.1
item-12 at level 4: paragraph:
item-13 at level 4: paragraph: Paragraph 1.1.1
item-14 at level 4: paragraph:
item-15 at level 4: paragraph: Paragraph 1.1.2
item-16 at level 4: paragraph:
item-12 at level 4: text:
item-13 at level 4: text: Paragraph 1.1.1
item-14 at level 4: text:
item-15 at level 4: text: Paragraph 1.1.2
item-16 at level 4: text:
item-17 at level 3: section_header: 1.2 Section 1.2
item-18 at level 4: paragraph:
item-19 at level 4: paragraph: Paragraph 1.1.1
item-20 at level 4: paragraph:
item-21 at level 4: paragraph: Paragraph 1.1.2
item-22 at level 4: paragraph:
item-18 at level 4: text:
item-19 at level 4: text: Paragraph 1.1.1
item-20 at level 4: text:
item-21 at level 4: text: Paragraph 1.1.2
item-22 at level 4: text:
item-23 at level 4: section_header: 1.2.1 Section 1.2.3
item-24 at level 5: paragraph:
item-25 at level 5: paragraph: Paragraph 1.2.3.1
item-26 at level 5: paragraph:
item-27 at level 5: paragraph: Paragraph 1.2.3.1
item-28 at level 5: paragraph:
item-29 at level 5: paragraph:
item-24 at level 5: text:
item-25 at level 5: text: Paragraph 1.2.3.1
item-26 at level 5: text:
item-27 at level 5: text: Paragraph 1.2.3.1
item-28 at level 5: text:
item-29 at level 5: text:
item-30 at level 2: section_header: 2 Section 2
item-31 at level 1: paragraph:
item-32 at level 1: paragraph: Paragraph 2.1
item-33 at level 1: paragraph:
item-34 at level 1: paragraph: Paragraph 2.2
item-35 at level 1: paragraph:
item-31 at level 1: text:
item-32 at level 1: text: Paragraph 2.1
item-33 at level 1: text:
item-34 at level 1: text: Paragraph 2.2
item-35 at level 1: text:
item-36 at level 1: section: group header-0
item-37 at level 2: section: group header-1
item-38 at level 3: section: group header-2
item-39 at level 4: section_header: 2.1.1 Section 2.1.1
item-40 at level 5: paragraph:
item-41 at level 5: paragraph: Paragraph 2.1.1.1
item-42 at level 5: paragraph:
item-43 at level 5: paragraph: Paragraph 2.1.1.1
item-44 at level 5: paragraph:
item-40 at level 5: text:
item-41 at level 5: text: Paragraph 2.1.1.1
item-42 at level 5: text:
item-43 at level 5: text: Paragraph 2.1.1.1
item-44 at level 5: text:
item-45 at level 3: section_header: 2.2 Section 2.1
item-46 at level 4: paragraph:
item-47 at level 4: paragraph: Paragraph 2.1.1
item-48 at level 4: paragraph:
item-49 at level 4: paragraph: Paragraph 2.1.2
item-50 at level 4: paragraph:
item-51 at level 4: paragraph:
item-46 at level 4: text:
item-47 at level 4: text: Paragraph 2.1.1
item-48 at level 4: text:
item-49 at level 4: text: Paragraph 2.1.2
item-50 at level 4: text:
item-51 at level 4: text:

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "unit_test_headers_numbered",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -169,7 +169,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -194,7 +194,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -206,7 +206,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 1.1",
"text": "Paragraph 1.1",
@@ -225,7 +225,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -237,7 +237,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 1.2",
"text": "Paragraph 1.2",
@@ -256,7 +256,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -297,7 +297,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -309,7 +309,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 1.1.1",
"text": "Paragraph 1.1.1",
@@ -328,7 +328,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -340,7 +340,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 1.1.2",
"text": "Paragraph 1.1.2",
@@ -359,7 +359,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -403,7 +403,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -415,7 +415,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 1.1.1",
"text": "Paragraph 1.1.1",
@@ -434,7 +434,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -446,7 +446,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 1.1.2",
"text": "Paragraph 1.1.2",
@@ -465,7 +465,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -509,7 +509,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -521,7 +521,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 1.2.3.1",
"text": "Paragraph 1.2.3.1",
@@ -540,7 +540,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -552,7 +552,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 1.2.3.1",
"text": "Paragraph 1.2.3.1",
@@ -571,7 +571,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -583,7 +583,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -608,7 +608,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -620,7 +620,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 2.1",
"text": "Paragraph 2.1",
@@ -639,7 +639,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -651,7 +651,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 2.2",
"text": "Paragraph 2.2",
@@ -670,7 +670,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -711,7 +711,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -723,7 +723,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 2.1.1.1",
"text": "Paragraph 2.1.1.1",
@@ -742,7 +742,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -754,7 +754,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 2.1.1.1",
"text": "Paragraph 2.1.1.1",
@@ -773,7 +773,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -817,7 +817,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -829,7 +829,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 2.1.1",
"text": "Paragraph 2.1.1",
@@ -848,7 +848,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -860,7 +860,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 2.1.2",
"text": "Paragraph 2.1.2",
@@ -879,7 +879,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -891,7 +891,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""

View File

@@ -1,25 +1,25 @@
item-0 at level 0: unspecified: group _root_
item-1 at level 1: section: group header-0
item-2 at level 2: section_header: Test Document
item-3 at level 3: paragraph:
item-4 at level 3: paragraph:
item-5 at level 3: paragraph: Paragraph 2.1.1
item-6 at level 3: paragraph:
item-7 at level 3: paragraph: Paragraph 2.1.2
item-8 at level 3: paragraph:
item-3 at level 3: text:
item-4 at level 3: text:
item-5 at level 3: text: Paragraph 2.1.1
item-6 at level 3: text:
item-7 at level 3: text: Paragraph 2.1.2
item-8 at level 3: text:
item-9 at level 3: section: group header-2
item-10 at level 4: section_header: Test 1:
item-11 at level 5: list: group list
item-12 at level 6: list_item: List item 1
item-13 at level 6: list_item: List item 2
item-14 at level 6: list_item: List item 3
item-15 at level 5: paragraph:
item-15 at level 5: text:
item-16 at level 4: section_header: Test 2:
item-17 at level 5: list: group list
item-18 at level 6: list_item: List item a
item-19 at level 6: list_item: List item b
item-20 at level 6: list_item: List item c
item-21 at level 5: paragraph:
item-21 at level 5: text:
item-22 at level 4: section_header: Test 3:
item-23 at level 5: list: group list
item-24 at level 6: list_item: List item 1
@@ -29,14 +29,14 @@ item-0 at level 0: unspecified: group _root_
item-28 at level 7: list_item: List item 1.2
item-29 at level 7: list_item: List item 1.3
item-30 at level 6: list_item: List item 3
item-31 at level 5: paragraph:
item-31 at level 5: text:
item-32 at level 4: section_header: Test 4:
item-33 at level 5: list: group list
item-34 at level 6: list_item: List item 1
item-35 at level 6: list: group list
item-36 at level 7: list_item: List item 1.1
item-37 at level 6: list_item: List item 2
item-38 at level 5: paragraph:
item-38 at level 5: text:
item-39 at level 4: section_header: Test 5:
item-40 at level 5: list: group list
item-41 at level 6: list_item: List item 1
@@ -45,7 +45,7 @@ item-0 at level 0: unspecified: group _root_
item-44 at level 7: list: group list
item-45 at level 8: list_item: List item 1.1.1
item-46 at level 6: list_item: List item 3
item-47 at level 5: paragraph:
item-47 at level 5: text:
item-48 at level 4: section_header: Test 6:
item-49 at level 5: list: group list
item-50 at level 6: list_item: List item 1
@@ -56,6 +56,6 @@ item-0 at level 0: unspecified: group _root_
item-55 at level 7: list: group list
item-56 at level 8: list_item: List item 1.2.1
item-57 at level 6: list_item: List item 3
item-58 at level 5: paragraph:
item-59 at level 5: paragraph:
item-60 at level 5: paragraph:
item-58 at level 5: text:
item-59 at level 5: text:
item-60 at level 5: text:

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "unit_test_lists",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -338,7 +338,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -350,7 +350,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -362,7 +362,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 2.1.1",
"text": "Paragraph 2.1.1",
@@ -381,7 +381,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -393,7 +393,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Paragraph 2.1.2",
"text": "Paragraph 2.1.2",
@@ -412,7 +412,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -507,7 +507,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -602,7 +602,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -760,7 +760,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -855,7 +855,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -971,7 +971,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1135,7 +1135,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1147,7 +1147,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -1159,7 +1159,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""

View File

@@ -1,16 +1,16 @@
item-0 at level 0: unspecified: group _root_
item-1 at level 1: paragraph: Transcript
item-2 at level 1: paragraph: February 20, 2025, 8:32PM
item-1 at level 1: text: Transcript
item-2 at level 1: text: February 20, 2025, 8:32PM
item-3 at level 1: picture
item-4 at level 1: inline: group group
item-5 at level 2: paragraph: This is test 1
item-6 at level 2: paragraph: 0:08
item-5 at level 2: text: This is test 1
item-6 at level 2: text: 0:08
Correct, he is not.
item-7 at level 1: paragraph:
item-7 at level 1: text:
item-8 at level 1: picture
item-9 at level 1: inline: group group
item-10 at level 2: paragraph: This is test 2
item-11 at level 2: paragraph: 0:16
item-10 at level 2: text: This is test 2
item-11 at level 2: text: 0:16
Yeah, exactly.
item-12 at level 1: paragraph:
item-13 at level 1: paragraph:
item-12 at level 1: text:
item-13 at level 1: text:

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "word_image_anchors",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -93,7 +93,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Transcript",
"text": "Transcript",
@@ -112,7 +112,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "February 20, 2025, 8:32PM",
"text": "February 20, 2025, 8:32PM",
@@ -131,7 +131,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "This is test 1",
"text": "This is test 1",
@@ -150,7 +150,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "0:08\nCorrect, he is not.",
"text": "0:08\nCorrect, he is not.",
@@ -169,7 +169,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -181,7 +181,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "This is test 2",
"text": "This is test 2",
@@ -200,7 +200,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "0:16\nYeah, exactly.",
"text": "0:16\nYeah, exactly.",
@@ -219,7 +219,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -231,7 +231,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""

View File

@@ -1,28 +1,28 @@
item-0 at level 0: unspecified: group _root_
item-1 at level 1: paragraph: Summer activities
item-1 at level 1: text: Summer activities
item-2 at level 1: title: Swimming in the lake
item-3 at level 2: paragraph: Duck
item-3 at level 2: text: Duck
item-4 at level 2: picture
item-5 at level 2: paragraph: Figure 1: This is a cute duckling
item-5 at level 2: text: Figure 1: This is a cute duckling
item-6 at level 2: section_header: Lets swim!
item-7 at level 3: paragraph: To get started with swimming, fi ... down in a water and try not to drown:
item-7 at level 3: text: To get started with swimming, fi ... down in a water and try not to drown:
item-8 at level 3: list: group list
item-9 at level 4: list_item: You can relax and look around
item-10 at level 4: list_item: Paddle about
item-11 at level 4: list_item: Enjoy summer warmth
item-12 at level 3: paragraph: Also, dont forget:
item-12 at level 3: text: Also, dont forget:
item-13 at level 3: list: group list
item-14 at level 4: list_item: Wear sunglasses
item-15 at level 4: list_item: Dont forget to drink water
item-16 at level 4: list_item: Use sun cream
item-17 at level 3: paragraph: Hmm, what else…
item-17 at level 3: text: Hmm, what else…
item-18 at level 3: section_header: Lets eat
item-19 at level 4: paragraph: After we had a good day of swimm ... , its important to eat something nice
item-20 at level 4: paragraph: I like to eat leaves
item-21 at level 4: paragraph: Here are some interesting things a respectful duck could eat:
item-19 at level 4: text: After we had a good day of swimm ... , its important to eat something nice
item-20 at level 4: text: I like to eat leaves
item-21 at level 4: text: Here are some interesting things a respectful duck could eat:
item-22 at level 4: table with [4x3]
item-23 at level 4: paragraph:
item-24 at level 4: paragraph: And lets add another list in the end:
item-23 at level 4: text:
item-24 at level 4: text: And lets add another list in the end:
item-25 at level 4: list: group list
item-26 at level 5: list_item: Leaves
item-27 at level 5: list_item: Berries

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.6.0",
"version": "1.7.0",
"name": "word_sample",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@@ -98,7 +98,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Summer activities",
"text": "Summer activities",
@@ -142,7 +142,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Duck",
"text": "Duck",
@@ -161,7 +161,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Figure 1: This is a cute duckling",
"text": "Figure 1: This is a cute duckling",
@@ -212,7 +212,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "To get started with swimming, first lay down in a water and try not to drown:",
"text": "To get started with swimming, first lay down in a water and try not to drown:",
@@ -294,7 +294,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Also, dont forget:",
"text": "Also, dont forget:",
@@ -376,7 +376,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Hmm, what else…",
"text": "Hmm, what else…",
@@ -430,7 +430,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "After we had a good day of swimming in the lake, its important to eat something nice",
"text": "After we had a good day of swimming in the lake, its important to eat something nice",
@@ -449,7 +449,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "I like to eat leaves",
"text": "I like to eat leaves",
@@ -468,7 +468,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "Here are some interesting things a respectful duck could eat:",
"text": "Here are some interesting things a respectful duck could eat:",
@@ -487,7 +487,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "",
"text": ""
@@ -499,7 +499,7 @@
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"label": "text",
"prov": [],
"orig": "And lets add another list in the end:",
"text": "And lets add another list in the end:",
@@ -625,7 +625,8 @@
"text": "",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -637,7 +638,8 @@
"text": "Food",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -649,7 +651,8 @@
"text": "Calories per portion",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -661,7 +664,8 @@
"text": "Leaves",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -673,7 +677,8 @@
"text": "Ash, Elm, Maple",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -685,7 +690,8 @@
"text": "50",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -697,7 +703,8 @@
"text": "Berries",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -709,7 +716,8 @@
"text": "Blueberry, Strawberry, Cranberry",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -721,7 +729,8 @@
"text": "150",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -733,7 +742,8 @@
"text": "Grain",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -745,7 +755,8 @@
"text": "Corn, Buckwheat, Barley",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -757,7 +768,8 @@
"text": "200",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
"num_rows": 4,
@@ -774,7 +786,8 @@
"text": "",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -786,7 +799,8 @@
"text": "Food",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -798,7 +812,8 @@
"text": "Calories per portion",
"column_header": true,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -812,7 +827,8 @@
"text": "Leaves",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -824,7 +840,8 @@
"text": "Ash, Elm, Maple",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -836,7 +853,8 @@
"text": "50",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -850,7 +868,8 @@
"text": "Berries",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -862,7 +881,8 @@
"text": "Blueberry, Strawberry, Cranberry",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -874,7 +894,8 @@
"text": "150",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
],
[
@@ -888,7 +909,8 @@
"text": "Grain",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -900,7 +922,8 @@
"text": "Corn, Buckwheat, Barley",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
},
{
"row_span": 1,
@@ -912,7 +935,8 @@
"text": "200",
"column_header": false,
"row_header": false,
"row_section": false
"row_section": false,
"fillable": false
}
]
]

View File

@@ -1,19 +1,19 @@
item-0 at level 0: unspecified: group _root_
item-1 at level 1: section: group header-0
item-2 at level 2: section_header: Test with tables
item-3 at level 3: paragraph: A uniform table
item-3 at level 3: text: A uniform table
item-4 at level 3: table with [3x3]
item-5 at level 3: paragraph:
item-6 at level 3: paragraph: A non-uniform table with horizontal spans
item-5 at level 3: text:
item-6 at level 3: text: A non-uniform table with horizontal spans
item-7 at level 3: table with [3x3]
item-8 at level 3: paragraph:
item-9 at level 3: paragraph: A non-uniform table with horizontal spans in inner columns
item-8 at level 3: text:
item-9 at level 3: text: A non-uniform table with horizontal spans in inner columns
item-10 at level 3: table with [3x4]
item-11 at level 3: paragraph:
item-12 at level 3: paragraph: A non-uniform table with vertical spans
item-11 at level 3: text:
item-12 at level 3: text: A non-uniform table with vertical spans
item-13 at level 3: table with [5x3]
item-14 at level 3: paragraph:
item-15 at level 3: paragraph: A non-uniform table with all kinds of spans and empty cells
item-14 at level 3: text:
item-15 at level 3: text: A non-uniform table with all kinds of spans and empty cells
item-16 at level 3: table with [9x5]
item-17 at level 3: paragraph:
item-18 at level 3: paragraph:
item-17 at level 3: text:
item-18 at level 3: text:

File diff suppressed because it is too large Load Diff