From 97f444b11c2724d292c9f7444188c41a34b9e1ee Mon Sep 17 00:00:00 2001 From: Rafael Teixeira de Lima Date: Wed, 29 Jan 2025 16:06:33 +0100 Subject: [PATCH] Update test files Signed-off-by: Rafael Teixeira de Lima --- .../docling_v2/equations.docx.itxt | 10 +++---- .../docling_v2/equations.docx.json | 30 +++++++++---------- .../groundtruth/docling_v2/equations.docx.md | 10 +++---- .../docling_v2/word_sample.docx.itxt | 2 +- .../docling_v2/word_sample.docx.json | 2 +- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/tests/data/groundtruth/docling_v2/equations.docx.itxt b/tests/data/groundtruth/docling_v2/equations.docx.itxt index c28443a9..b6fc5d14 100644 --- a/tests/data/groundtruth/docling_v2/equations.docx.itxt +++ b/tests/data/groundtruth/docling_v2/equations.docx.itxt @@ -1,31 +1,31 @@ item-0 at level 0: unspecified: group _root_ item-1 at level 1: paragraph: This is a word document and this ... nt an equation by line, I can do this: item-2 at level 1: paragraph: - item-3 at level 1: paragraph: $a^{2}+b^{2}=c^{2} \text{ \texttimes } 23$ + item-3 at level 1: formula: a^{2}+b^{2}=c^{2} \text{ \texttimes } 23 item-4 at level 1: paragraph: And that is an equation by itself. Cheers! item-5 at level 1: paragraph: item-6 at level 1: paragraph: This is another equation: - item-7 at level 1: paragraph: $f\left(x\right)=a_{0}+\sum_{n=1 ... )+b_{n}\sin(\frac{n \pi x}{L})\right)$ + item-7 at level 1: formula: f\left(x\right)=a_{0}+\sum_{n=1} ... })+b_{n}\sin(\frac{n \pi x}{L})\right) item-8 at level 1: paragraph: item-9 at level 1: paragraph: This is text. This is text. This ... s is text. This is text. This is text. item-10 at level 1: paragraph: item-11 at level 1: paragraph: item-12 at level 1: paragraph: This is a word document and this ... nt an equation by line, I can do this: item-13 at level 1: paragraph: - item-14 at level 1: paragraph: $\left(x+a\right)^{n}=\sum_{k=0} ... c{}{}{0pt}{}{n}{k}\right)x^{k}a^{n-k}$ + item-14 at level 1: formula: \left(x+a\right)^{n}=\sum_{k=0}^ ... ac{}{}{0pt}{}{n}{k}\right)x^{k}a^{n-k} item-15 at level 1: paragraph: item-16 at level 1: paragraph: And that is an equation by itself. Cheers! item-17 at level 1: paragraph: item-18 at level 1: paragraph: This is another equation: item-19 at level 1: paragraph: - item-20 at level 1: paragraph: $\left(1+x\right)^{n}=1+\frac{nx ... t)x^{2}}{2!}+ \text{ \textellipsis } $ + item-20 at level 1: formula: \left(1+x\right)^{n}=1+\frac{nx} ... ht)x^{2}}{2!}+ \text{ \textellipsis } item-21 at level 1: paragraph: item-22 at level 1: paragraph: This is text. This is text. This ... s is text. This is text. This is text. item-23 at level 1: paragraph: item-24 at level 1: paragraph: item-25 at level 1: paragraph: This is a word document and this ... nt an equation by line, I can do this: item-26 at level 1: paragraph: - item-27 at level 1: paragraph: $e^{x}=1+\frac{x}{1!}+\frac{x^{2 ... ellipsis } , - \infty < x < \infty $ + item-27 at level 1: formula: e^{x}=1+\frac{x}{1!}+\frac{x^{2} ... tellipsis } , - \infty < x < \infty item-28 at level 1: paragraph: item-29 at level 1: paragraph: And that is an equation by itself. Cheers! item-30 at level 1: paragraph: \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/equations.docx.json b/tests/data/groundtruth/docling_v2/equations.docx.json index 2f6cb7ca..1905f9ca 100644 --- a/tests/data/groundtruth/docling_v2/equations.docx.json +++ b/tests/data/groundtruth/docling_v2/equations.docx.json @@ -140,10 +140,10 @@ "$ref": "#/body" }, "children": [], - "label": "paragraph", + "label": "formula", "prov": [], - "orig": "$a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23$", - "text": "$a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23$" + "orig": "a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23", + "text": "a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23" }, { "self_ref": "#/texts/3", @@ -184,10 +184,10 @@ "$ref": "#/body" }, "children": [], - "label": "paragraph", + "label": "formula", "prov": [], - "orig": "$f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)$", - "text": "$f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)$" + "orig": "f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)", + "text": "f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)" }, { "self_ref": "#/texts/7", @@ -261,10 +261,10 @@ "$ref": "#/body" }, "children": [], - "label": "paragraph", + "label": "formula", "prov": [], - "orig": "$\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}$", - "text": "$\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}$" + "orig": "\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}", + "text": "\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}" }, { "self_ref": "#/texts/14", @@ -327,10 +327,10 @@ "$ref": "#/body" }, "children": [], - "label": "paragraph", + "label": "formula", "prov": [], - "orig": "$\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis } $", - "text": "$\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis } $" + "orig": "\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis } ", + "text": "\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis } " }, { "self_ref": "#/texts/20", @@ -404,10 +404,10 @@ "$ref": "#/body" }, "children": [], - "label": "paragraph", + "label": "formula", "prov": [], - "orig": "$e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty $", - "text": "$e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty $" + "orig": "e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty ", + "text": "e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty " }, { "self_ref": "#/texts/27", diff --git a/tests/data/groundtruth/docling_v2/equations.docx.md b/tests/data/groundtruth/docling_v2/equations.docx.md index 7364d129..bb023bbd 100644 --- a/tests/data/groundtruth/docling_v2/equations.docx.md +++ b/tests/data/groundtruth/docling_v2/equations.docx.md @@ -1,29 +1,29 @@ This is a word document and this is an inline equation: $A= \pi r^{2} $. If instead, I want an equation by line, I can do this: -$a^{2}+b^{2}=c^{2} \text{ \texttimes } 23$ +$$a^{2}+b^{2}=c^{2} \text{ \texttimes } 23$$ And that is an equation by itself. Cheers! This is another equation: -$f\left(x\right)=a\_{0}+\sum\_{n=1}^{ \infty }\left(a\_{n}\cos(\frac{n \pi x}{L})+b\_{n}\sin(\frac{n \pi x}{L})\right)$ +$$f\left(x\right)=a_{0}+\sum_{n=1}^{ \infty }\left(a_{n}\cos(\frac{n \pi x}{L})+b_{n}\sin(\frac{n \pi x}{L})\right)$$ This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is a word document and this is an inline equation: $A= \pi r^{2} $. If instead, I want an equation by line, I can do this: -$\left(x+a\right)^{n}=\sum\_{k=0}^{n}\left(\genfrac{}{}{0pt}{}{n}{k}\right)x^{k}a^{n-k}$ +$$\left(x+a\right)^{n}=\sum_{k=0}^{n}\left(\genfrac{}{}{0pt}{}{n}{k}\right)x^{k}a^{n-k}$$ And that is an equation by itself. Cheers! This is another equation: -$\left(1+x\right)^{n}=1+\frac{nx}{1!}+\frac{n\left(n-1\right)x^{2}}{2!}+ \text{ \textellipsis } $ +$$\left(1+x\right)^{n}=1+\frac{nx}{1!}+\frac{n\left(n-1\right)x^{2}}{2!}+ \text{ \textellipsis } $$ This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is a word document and this is an inline equation: $A= \pi r^{2} $. If instead, I want an equation by line, I can do this: -$e^{x}=1+\frac{x}{1!}+\frac{x^{2}}{2!}+\frac{x^{3}}{3!}+ \text{ \textellipsis } , - \infty < x < \infty $ +$$e^{x}=1+\frac{x}{1!}+\frac{x^{2}}{2!}+\frac{x^{3}}{3!}+ \text{ \textellipsis } , - \infty < x < \infty $$ And that is an equation by itself. Cheers! \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/word_sample.docx.itxt b/tests/data/groundtruth/docling_v2/word_sample.docx.itxt index ce60ad26..b0325510 100644 --- a/tests/data/groundtruth/docling_v2/word_sample.docx.itxt +++ b/tests/data/groundtruth/docling_v2/word_sample.docx.itxt @@ -3,7 +3,7 @@ item-0 at level 0: unspecified: group _root_ item-2 at level 1: title: Swimming in the lake item-3 at level 2: paragraph: Duck item-4 at level 2: picture - item-5 at level 2: paragraph: Figure 1: This is a cute duckling + item-5 at level 2: text: Figure 1: This is a cute duckling item-6 at level 2: section_header: Let’s swim! item-7 at level 3: paragraph: To get started with swimming, fi ... down in a water and try not to drown: item-8 at level 3: list: group list diff --git a/tests/data/groundtruth/docling_v2/word_sample.docx.json b/tests/data/groundtruth/docling_v2/word_sample.docx.json index 8c6e6298..44b4bd61 100644 --- a/tests/data/groundtruth/docling_v2/word_sample.docx.json +++ b/tests/data/groundtruth/docling_v2/word_sample.docx.json @@ -138,7 +138,7 @@ "$ref": "#/texts/1" }, "children": [], - "label": "paragraph", + "label": "text", "prov": [], "orig": "Figure 1: This is a cute duckling", "text": "Figure 1: This is a cute duckling"