diff --git a/tests/data/groundtruth/docling_v2/equations.docx.itxt b/tests/data/groundtruth/docling_v2/equations.docx.itxt index c28443a9..b6fc5d14 100644 --- a/tests/data/groundtruth/docling_v2/equations.docx.itxt +++ b/tests/data/groundtruth/docling_v2/equations.docx.itxt @@ -1,31 +1,31 @@ item-0 at level 0: unspecified: group _root_ item-1 at level 1: paragraph: This is a word document and this ... nt an equation by line, I can do this: item-2 at level 1: paragraph: - item-3 at level 1: paragraph: $a^{2}+b^{2}=c^{2} \text{ \texttimes } 23$ + item-3 at level 1: formula: a^{2}+b^{2}=c^{2} \text{ \texttimes } 23 item-4 at level 1: paragraph: And that is an equation by itself. Cheers! item-5 at level 1: paragraph: item-6 at level 1: paragraph: This is another equation: - item-7 at level 1: paragraph: $f\left(x\right)=a_{0}+\sum_{n=1 ... )+b_{n}\sin(\frac{n \pi x}{L})\right)$ + item-7 at level 1: formula: f\left(x\right)=a_{0}+\sum_{n=1} ... })+b_{n}\sin(\frac{n \pi x}{L})\right) item-8 at level 1: paragraph: item-9 at level 1: paragraph: This is text. This is text. This ... s is text. This is text. This is text. item-10 at level 1: paragraph: item-11 at level 1: paragraph: item-12 at level 1: paragraph: This is a word document and this ... nt an equation by line, I can do this: item-13 at level 1: paragraph: - item-14 at level 1: paragraph: $\left(x+a\right)^{n}=\sum_{k=0} ... c{}{}{0pt}{}{n}{k}\right)x^{k}a^{n-k}$ + item-14 at level 1: formula: \left(x+a\right)^{n}=\sum_{k=0}^ ... ac{}{}{0pt}{}{n}{k}\right)x^{k}a^{n-k} item-15 at level 1: paragraph: item-16 at level 1: paragraph: And that is an equation by itself. Cheers! item-17 at level 1: paragraph: item-18 at level 1: paragraph: This is another equation: item-19 at level 1: paragraph: - item-20 at level 1: paragraph: $\left(1+x\right)^{n}=1+\frac{nx ... t)x^{2}}{2!}+ \text{ \textellipsis } $ + item-20 at level 1: formula: \left(1+x\right)^{n}=1+\frac{nx} ... ht)x^{2}}{2!}+ \text{ \textellipsis } item-21 at level 1: paragraph: item-22 at level 1: paragraph: This is text. This is text. This ... s is text. This is text. This is text. item-23 at level 1: paragraph: item-24 at level 1: paragraph: item-25 at level 1: paragraph: This is a word document and this ... nt an equation by line, I can do this: item-26 at level 1: paragraph: - item-27 at level 1: paragraph: $e^{x}=1+\frac{x}{1!}+\frac{x^{2 ... ellipsis } , - \infty < x < \infty $ + item-27 at level 1: formula: e^{x}=1+\frac{x}{1!}+\frac{x^{2} ... tellipsis } , - \infty < x < \infty item-28 at level 1: paragraph: item-29 at level 1: paragraph: And that is an equation by itself. Cheers! item-30 at level 1: paragraph: \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/equations.docx.json b/tests/data/groundtruth/docling_v2/equations.docx.json index 2f6cb7ca..1905f9ca 100644 --- a/tests/data/groundtruth/docling_v2/equations.docx.json +++ b/tests/data/groundtruth/docling_v2/equations.docx.json @@ -140,10 +140,10 @@ "$ref": "#/body" }, "children": [], - "label": "paragraph", + "label": "formula", "prov": [], - "orig": "$a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23$", - "text": "$a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23$" + "orig": "a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23", + "text": "a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23" }, { "self_ref": "#/texts/3", @@ -184,10 +184,10 @@ "$ref": "#/body" }, "children": [], - "label": "paragraph", + "label": "formula", "prov": [], - "orig": "$f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)$", - "text": "$f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)$" + "orig": "f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)", + "text": "f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)" }, { "self_ref": "#/texts/7", @@ -261,10 +261,10 @@ "$ref": "#/body" }, "children": [], - "label": "paragraph", + "label": "formula", "prov": [], - "orig": "$\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}$", - "text": "$\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}$" + "orig": "\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}", + "text": "\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}" }, { "self_ref": "#/texts/14", @@ -327,10 +327,10 @@ "$ref": "#/body" }, "children": [], - "label": "paragraph", + "label": "formula", "prov": [], - "orig": "$\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis } $", - "text": "$\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis } $" + "orig": "\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis } ", + "text": "\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis } " }, { "self_ref": "#/texts/20", @@ -404,10 +404,10 @@ "$ref": "#/body" }, "children": [], - "label": "paragraph", + "label": "formula", "prov": [], - "orig": "$e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty $", - "text": "$e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty $" + "orig": "e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty ", + "text": "e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty " }, { "self_ref": "#/texts/27", diff --git a/tests/data/groundtruth/docling_v2/equations.docx.md b/tests/data/groundtruth/docling_v2/equations.docx.md index 7364d129..bb023bbd 100644 --- a/tests/data/groundtruth/docling_v2/equations.docx.md +++ b/tests/data/groundtruth/docling_v2/equations.docx.md @@ -1,29 +1,29 @@ This is a word document and this is an inline equation: $A= \pi r^{2} $. If instead, I want an equation by line, I can do this: -$a^{2}+b^{2}=c^{2} \text{ \texttimes } 23$ +$$a^{2}+b^{2}=c^{2} \text{ \texttimes } 23$$ And that is an equation by itself. Cheers! This is another equation: -$f\left(x\right)=a\_{0}+\sum\_{n=1}^{ \infty }\left(a\_{n}\cos(\frac{n \pi x}{L})+b\_{n}\sin(\frac{n \pi x}{L})\right)$ +$$f\left(x\right)=a_{0}+\sum_{n=1}^{ \infty }\left(a_{n}\cos(\frac{n \pi x}{L})+b_{n}\sin(\frac{n \pi x}{L})\right)$$ This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is a word document and this is an inline equation: $A= \pi r^{2} $. If instead, I want an equation by line, I can do this: -$\left(x+a\right)^{n}=\sum\_{k=0}^{n}\left(\genfrac{}{}{0pt}{}{n}{k}\right)x^{k}a^{n-k}$ +$$\left(x+a\right)^{n}=\sum_{k=0}^{n}\left(\genfrac{}{}{0pt}{}{n}{k}\right)x^{k}a^{n-k}$$ And that is an equation by itself. Cheers! This is another equation: -$\left(1+x\right)^{n}=1+\frac{nx}{1!}+\frac{n\left(n-1\right)x^{2}}{2!}+ \text{ \textellipsis } $ +$$\left(1+x\right)^{n}=1+\frac{nx}{1!}+\frac{n\left(n-1\right)x^{2}}{2!}+ \text{ \textellipsis } $$ This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is a word document and this is an inline equation: $A= \pi r^{2} $. If instead, I want an equation by line, I can do this: -$e^{x}=1+\frac{x}{1!}+\frac{x^{2}}{2!}+\frac{x^{3}}{3!}+ \text{ \textellipsis } , - \infty < x < \infty $ +$$e^{x}=1+\frac{x}{1!}+\frac{x^{2}}{2!}+\frac{x^{3}}{3!}+ \text{ \textellipsis } , - \infty < x < \infty $$ And that is an equation by itself. Cheers! \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/word_sample.docx.itxt b/tests/data/groundtruth/docling_v2/word_sample.docx.itxt index ce60ad26..b0325510 100644 --- a/tests/data/groundtruth/docling_v2/word_sample.docx.itxt +++ b/tests/data/groundtruth/docling_v2/word_sample.docx.itxt @@ -3,7 +3,7 @@ item-0 at level 0: unspecified: group _root_ item-2 at level 1: title: Swimming in the lake item-3 at level 2: paragraph: Duck item-4 at level 2: picture - item-5 at level 2: paragraph: Figure 1: This is a cute duckling + item-5 at level 2: text: Figure 1: This is a cute duckling item-6 at level 2: section_header: Let’s swim! item-7 at level 3: paragraph: To get started with swimming, fi ... down in a water and try not to drown: item-8 at level 3: list: group list diff --git a/tests/data/groundtruth/docling_v2/word_sample.docx.json b/tests/data/groundtruth/docling_v2/word_sample.docx.json index 8c6e6298..44b4bd61 100644 --- a/tests/data/groundtruth/docling_v2/word_sample.docx.json +++ b/tests/data/groundtruth/docling_v2/word_sample.docx.json @@ -138,7 +138,7 @@ "$ref": "#/texts/1" }, "children": [], - "label": "paragraph", + "label": "text", "prov": [], "orig": "Figure 1: This is a cute duckling", "text": "Figure 1: This is a cute duckling"