fix(docx): parse integrals as n-ary objects without chr element (#2712)

Signed-off-by: Cesar Berrospi Ramis <ceb@zurich.ibm.com>
This commit is contained in:
Cesar Berrospi Ramis
2025-12-03 11:25:52 +01:00
committed by GitHub
parent f80c903c24
commit c97715f5fd
6 changed files with 248 additions and 5 deletions

View File

@@ -65,6 +65,11 @@ CHR_BO = {
"\u2210": "\\coprod", "\u2210": "\\coprod",
"\u2211": "\\sum", "\u2211": "\\sum",
"\u222b": "\\int", "\u222b": "\\int",
"\u222c": "\\iint",
"\u222d": "\\iiint",
"\u222e": "\\oint",
"\u222f": "\\oiint",
"\u2230": "\\oiiint",
"\u22c0": "\\bigwedge", "\u22c0": "\\bigwedge",
"\u22c1": "\\bigvee", "\u22c1": "\\bigvee",
"\u22c2": "\\bigcap", "\u22c2": "\\bigcap",

View File

@@ -381,7 +381,8 @@ class oMath2Latex(Tag2Method):
bo = "" bo = ""
for stag, t, e in self.process_children_list(elm): for stag, t, e in self.process_children_list(elm):
if stag == "naryPr": if stag == "naryPr":
bo = get_val(t.chr, store=CHR_BO) # if <m:naryPr> contains no <m:chr>, the n-ary represents an integral
bo = get_val(t.chr, default="\\int", store=CHR_BO)
else: else:
res.append(t) res.append(t)
return bo + BLANK.join(res) return bo + BLANK.join(res)

Binary file not shown.

View File

@@ -38,3 +38,16 @@ item-0 at level 0: unspecified: group _root_
item-37 at level 1: text: item-37 at level 1: text:
item-38 at level 1: text: And that is an equation by itself. Cheers! item-38 at level 1: text: And that is an equation by itself. Cheers!
item-39 at level 1: text: item-39 at level 1: text:
item-40 at level 1: text: Large operators and integrals ar ... sented with n-ary objects in OMML XML:
item-41 at level 1: text:
item-42 at level 1: formula: \sum_{0}^{2}x
item-43 at level 1: formula: \bigcup_{n=1}^{m}\left(X_{n} \cap Y_{n}\right)
item-44 at level 1: formula: \prod_{k=1}^{n}A_{k}
item-45 at level 1: formula: \bigwedge_{}^{}x
item-46 at level 1: formula: \int_{}^{}(2x+1)dx
item-47 at level 1: formula: \iint_{0}^{1}xdx
item-48 at level 1: formula: \iiint_{}^{}ydy
item-49 at level 1: formula: \oint_{}^{}\frac{dy}{dx}
item-50 at level 1: formula: \oiint_{0}^{2 \pi }idt
item-51 at level 1: formula: \oiiint_{C}^{}\frac{1}{z}dz
item-52 at level 1: text:

View File

@@ -1,10 +1,10 @@
{ {
"schema_name": "DoclingDocument", "schema_name": "DoclingDocument",
"version": "1.7.0", "version": "1.8.0",
"name": "equations", "name": "equations",
"origin": { "origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"binary_hash": 11121138535595486899, "binary_hash": 8638432756089077257,
"filename": "equations.docx" "filename": "equations.docx"
}, },
"furniture": { "furniture": {
@@ -106,6 +106,45 @@
}, },
{ {
"$ref": "#/texts/35" "$ref": "#/texts/35"
},
{
"$ref": "#/texts/36"
},
{
"$ref": "#/texts/37"
},
{
"$ref": "#/texts/38"
},
{
"$ref": "#/texts/39"
},
{
"$ref": "#/texts/40"
},
{
"$ref": "#/texts/41"
},
{
"$ref": "#/texts/42"
},
{
"$ref": "#/texts/43"
},
{
"$ref": "#/texts/44"
},
{
"$ref": "#/texts/45"
},
{
"$ref": "#/texts/46"
},
{
"$ref": "#/texts/47"
},
{
"$ref": "#/texts/48"
} }
], ],
"content_layer": "body", "content_layer": "body",
@@ -655,6 +694,169 @@
"prov": [], "prov": [],
"orig": "", "orig": "",
"text": "" "text": ""
},
{
"self_ref": "#/texts/36",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "text",
"prov": [],
"orig": "Large operators and integrals are represented with n-ary objects in OMML XML:",
"text": "Large operators and integrals are represented with n-ary objects in OMML XML:",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/37",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "text",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/38",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "\\sum_{0}^{2}x",
"text": "\\sum_{0}^{2}x"
},
{
"self_ref": "#/texts/39",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "\\bigcup_{n=1}^{m}\\left(X_{n} \\cap Y_{n}\\right)",
"text": "\\bigcup_{n=1}^{m}\\left(X_{n} \\cap Y_{n}\\right)"
},
{
"self_ref": "#/texts/40",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "\\prod_{k=1}^{n}A_{k}",
"text": "\\prod_{k=1}^{n}A_{k}"
},
{
"self_ref": "#/texts/41",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "\\bigwedge_{}^{}x",
"text": "\\bigwedge_{}^{}x"
},
{
"self_ref": "#/texts/42",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "\\int_{}^{}(2x+1)dx",
"text": "\\int_{}^{}(2x+1)dx"
},
{
"self_ref": "#/texts/43",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "\\iint_{0}^{1}xdx",
"text": "\\iint_{0}^{1}xdx"
},
{
"self_ref": "#/texts/44",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "\\iiint_{}^{}ydy",
"text": "\\iiint_{}^{}ydy"
},
{
"self_ref": "#/texts/45",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "\\oint_{}^{}\\frac{dy}{dx}",
"text": "\\oint_{}^{}\\frac{dy}{dx}"
},
{
"self_ref": "#/texts/46",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "\\oiint_{0}^{2 \\pi }idt",
"text": "\\oiint_{0}^{2 \\pi }idt"
},
{
"self_ref": "#/texts/47",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "\\oiiint_{C}^{}\\frac{1}{z}dz",
"text": "\\oiiint_{C}^{}\\frac{1}{z}dz"
},
{
"self_ref": "#/texts/48",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "text",
"prov": [],
"orig": "",
"text": ""
} }
], ],
"pictures": [], "pictures": [],

View File

@@ -27,3 +27,25 @@ This is a word document and this is an inline equation: $A= \pi r^{2}$ . If ins
$$e^{x}=1+\frac{x}{1!}+\frac{x^{2}}{2!}+\frac{x^{3}}{3!}+ \text{ \textellipsis } , - \infty < x < \infty$$ $$e^{x}=1+\frac{x}{1!}+\frac{x^{2}}{2!}+\frac{x^{3}}{3!}+ \text{ \textellipsis } , - \infty < x < \infty$$
And that is an equation by itself. Cheers! And that is an equation by itself. Cheers!
Large operators and integrals are represented with n-ary objects in OMML XML:
$$\sum_{0}^{2}x$$
$$\bigcup_{n=1}^{m}\left(X_{n} \cap Y_{n}\right)$$
$$\prod_{k=1}^{n}A_{k}$$
$$\bigwedge_{}^{}x$$
$$\int_{}^{}(2x+1)dx$$
$$\iint_{0}^{1}xdx$$
$$\iiint_{}^{}ydy$$
$$\oint_{}^{}\frac{dy}{dx}$$
$$\oiint_{0}^{2 \pi }idt$$
$$\oiiint_{C}^{}\frac{1}{z}dz$$