docling/tests/data/groundtruth/docling_v2/equations.docx.json
Christoph Auer e34c0750a7
Some checks failed
Run Docs CI / build-docs (push) Failing after 1m26s
Run CI / code-checks (push) Failing after 6m40s
Reset all tests to use docling-parse v1 for now
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2025-03-14 16:39:16 +01:00

616 lines
15 KiB
JSON

{
"schema_name": "DoclingDocument",
"version": "1.3.0",
"name": "equations",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"binary_hash": 11121138535595486899,
"filename": "equations.docx"
},
"furniture": {
"self_ref": "#/furniture",
"children": [],
"content_layer": "furniture",
"name": "_root_",
"label": "unspecified"
},
"body": {
"self_ref": "#/body",
"children": [
{
"$ref": "#/groups/0"
},
{
"$ref": "#/texts/3"
},
{
"$ref": "#/texts/4"
},
{
"$ref": "#/texts/5"
},
{
"$ref": "#/texts/6"
},
{
"$ref": "#/texts/7"
},
{
"$ref": "#/texts/8"
},
{
"$ref": "#/texts/9"
},
{
"$ref": "#/texts/10"
},
{
"$ref": "#/texts/11"
},
{
"$ref": "#/texts/12"
},
{
"$ref": "#/groups/1"
},
{
"$ref": "#/texts/16"
},
{
"$ref": "#/texts/17"
},
{
"$ref": "#/texts/18"
},
{
"$ref": "#/texts/19"
},
{
"$ref": "#/texts/20"
},
{
"$ref": "#/texts/21"
},
{
"$ref": "#/texts/22"
},
{
"$ref": "#/texts/23"
},
{
"$ref": "#/texts/24"
},
{
"$ref": "#/texts/25"
},
{
"$ref": "#/texts/26"
},
{
"$ref": "#/texts/27"
},
{
"$ref": "#/groups/2"
},
{
"$ref": "#/texts/31"
},
{
"$ref": "#/texts/32"
},
{
"$ref": "#/texts/33"
},
{
"$ref": "#/texts/34"
},
{
"$ref": "#/texts/35"
}
],
"content_layer": "body",
"name": "_root_",
"label": "unspecified"
},
"groups": [
{
"self_ref": "#/groups/0",
"parent": {
"$ref": "#/body"
},
"children": [
{
"$ref": "#/texts/0"
},
{
"$ref": "#/texts/1"
},
{
"$ref": "#/texts/2"
}
],
"content_layer": "body",
"name": "group",
"label": "inline"
},
{
"self_ref": "#/groups/1",
"parent": {
"$ref": "#/body"
},
"children": [
{
"$ref": "#/texts/13"
},
{
"$ref": "#/texts/14"
},
{
"$ref": "#/texts/15"
}
],
"content_layer": "body",
"name": "group",
"label": "inline"
},
{
"self_ref": "#/groups/2",
"parent": {
"$ref": "#/body"
},
"children": [
{
"$ref": "#/texts/28"
},
{
"$ref": "#/texts/29"
},
{
"$ref": "#/texts/30"
}
],
"content_layer": "body",
"name": "group",
"label": "inline"
}
],
"texts": [
{
"self_ref": "#/texts/0",
"parent": {
"$ref": "#/groups/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is a word document and this is an inline equation: ",
"text": "This is a word document and this is an inline equation: "
},
{
"self_ref": "#/texts/1",
"parent": {
"$ref": "#/groups/0"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "A= \\pi r^{2} ",
"text": "A= \\pi r^{2} "
},
{
"self_ref": "#/texts/2",
"parent": {
"$ref": "#/groups/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": ". If instead, I want an equation by line, I can do this:",
"text": ". If instead, I want an equation by line, I can do this:"
},
{
"self_ref": "#/texts/3",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/4",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23",
"text": "a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23"
},
{
"self_ref": "#/texts/5",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "And that is an equation by itself. Cheers!",
"text": "And that is an equation by itself. Cheers!"
},
{
"self_ref": "#/texts/6",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/7",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is another equation:",
"text": "This is another equation:"
},
{
"self_ref": "#/texts/8",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)",
"text": "f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)"
},
{
"self_ref": "#/texts/9",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/10",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text."
},
{
"self_ref": "#/texts/11",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/12",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/13",
"parent": {
"$ref": "#/groups/1"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is a word document and this is an inline equation: ",
"text": "This is a word document and this is an inline equation: "
},
{
"self_ref": "#/texts/14",
"parent": {
"$ref": "#/groups/1"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "A= \\pi r^{2} ",
"text": "A= \\pi r^{2} "
},
{
"self_ref": "#/texts/15",
"parent": {
"$ref": "#/groups/1"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": ". If instead, I want an equation by line, I can do this:",
"text": ". If instead, I want an equation by line, I can do this:"
},
{
"self_ref": "#/texts/16",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/17",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}",
"text": "\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}"
},
{
"self_ref": "#/texts/18",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/19",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "And that is an equation by itself. Cheers!",
"text": "And that is an equation by itself. Cheers!"
},
{
"self_ref": "#/texts/20",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/21",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is another equation:",
"text": "This is another equation:"
},
{
"self_ref": "#/texts/22",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/23",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis }",
"text": "\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis }"
},
{
"self_ref": "#/texts/24",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/25",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text."
},
{
"self_ref": "#/texts/26",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/27",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/28",
"parent": {
"$ref": "#/groups/2"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is a word document and this is an inline equation: ",
"text": "This is a word document and this is an inline equation: "
},
{
"self_ref": "#/texts/29",
"parent": {
"$ref": "#/groups/2"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "A= \\pi r^{2} ",
"text": "A= \\pi r^{2} "
},
{
"self_ref": "#/texts/30",
"parent": {
"$ref": "#/groups/2"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": ". If instead, I want an equation by line, I can do this:",
"text": ". If instead, I want an equation by line, I can do this:"
},
{
"self_ref": "#/texts/31",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/32",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty",
"text": "e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty"
},
{
"self_ref": "#/texts/33",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/34",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "And that is an equation by itself. Cheers!",
"text": "And that is an equation by itself. Cheers!"
},
{
"self_ref": "#/texts/35",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
}
],
"pictures": [],
"tables": [],
"key_value_items": [],
"form_items": [],
"pages": {}
}