mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
fix(docx): parse integrals as n-ary objects without chr element (#2712)
Signed-off-by: Cesar Berrospi Ramis <ceb@zurich.ibm.com>
This commit is contained in:
committed by
GitHub
parent
f80c903c24
commit
c97715f5fd
@@ -65,6 +65,11 @@ CHR_BO = {
|
|||||||
"\u2210": "\\coprod",
|
"\u2210": "\\coprod",
|
||||||
"\u2211": "\\sum",
|
"\u2211": "\\sum",
|
||||||
"\u222b": "\\int",
|
"\u222b": "\\int",
|
||||||
|
"\u222c": "\\iint",
|
||||||
|
"\u222d": "\\iiint",
|
||||||
|
"\u222e": "\\oint",
|
||||||
|
"\u222f": "\\oiint",
|
||||||
|
"\u2230": "\\oiiint",
|
||||||
"\u22c0": "\\bigwedge",
|
"\u22c0": "\\bigwedge",
|
||||||
"\u22c1": "\\bigvee",
|
"\u22c1": "\\bigvee",
|
||||||
"\u22c2": "\\bigcap",
|
"\u22c2": "\\bigcap",
|
||||||
|
|||||||
@@ -381,7 +381,8 @@ class oMath2Latex(Tag2Method):
|
|||||||
bo = ""
|
bo = ""
|
||||||
for stag, t, e in self.process_children_list(elm):
|
for stag, t, e in self.process_children_list(elm):
|
||||||
if stag == "naryPr":
|
if stag == "naryPr":
|
||||||
bo = get_val(t.chr, store=CHR_BO)
|
# if <m:naryPr> contains no <m:chr>, the n-ary represents an integral
|
||||||
|
bo = get_val(t.chr, default="\\int", store=CHR_BO)
|
||||||
else:
|
else:
|
||||||
res.append(t)
|
res.append(t)
|
||||||
return bo + BLANK.join(res)
|
return bo + BLANK.join(res)
|
||||||
|
|||||||
BIN
tests/data/docx/equations.docx
vendored
BIN
tests/data/docx/equations.docx
vendored
Binary file not shown.
@@ -37,4 +37,17 @@ item-0 at level 0: unspecified: group _root_
|
|||||||
item-36 at level 1: formula: e^{x}=1+\frac{x}{1!}+\frac{x^{2} ... xtellipsis } , - \infty < x < \infty
|
item-36 at level 1: formula: e^{x}=1+\frac{x}{1!}+\frac{x^{2} ... xtellipsis } , - \infty < x < \infty
|
||||||
item-37 at level 1: text:
|
item-37 at level 1: text:
|
||||||
item-38 at level 1: text: And that is an equation by itself. Cheers!
|
item-38 at level 1: text: And that is an equation by itself. Cheers!
|
||||||
item-39 at level 1: text:
|
item-39 at level 1: text:
|
||||||
|
item-40 at level 1: text: Large operators and integrals ar ... sented with n-ary objects in OMML XML:
|
||||||
|
item-41 at level 1: text:
|
||||||
|
item-42 at level 1: formula: \sum_{0}^{2}x
|
||||||
|
item-43 at level 1: formula: \bigcup_{n=1}^{m}\left(X_{n} \cap Y_{n}\right)
|
||||||
|
item-44 at level 1: formula: \prod_{k=1}^{n}A_{k}
|
||||||
|
item-45 at level 1: formula: \bigwedge_{}^{}x
|
||||||
|
item-46 at level 1: formula: \int_{}^{}(2x+1)dx
|
||||||
|
item-47 at level 1: formula: \iint_{0}^{1}xdx
|
||||||
|
item-48 at level 1: formula: \iiint_{}^{}ydy
|
||||||
|
item-49 at level 1: formula: \oint_{}^{}\frac{dy}{dx}
|
||||||
|
item-50 at level 1: formula: \oiint_{0}^{2 \pi }idt
|
||||||
|
item-51 at level 1: formula: \oiiint_{C}^{}\frac{1}{z}dz
|
||||||
|
item-52 at level 1: text:
|
||||||
@@ -1,10 +1,10 @@
|
|||||||
{
|
{
|
||||||
"schema_name": "DoclingDocument",
|
"schema_name": "DoclingDocument",
|
||||||
"version": "1.7.0",
|
"version": "1.8.0",
|
||||||
"name": "equations",
|
"name": "equations",
|
||||||
"origin": {
|
"origin": {
|
||||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
"binary_hash": 11121138535595486899,
|
"binary_hash": 8638432756089077257,
|
||||||
"filename": "equations.docx"
|
"filename": "equations.docx"
|
||||||
},
|
},
|
||||||
"furniture": {
|
"furniture": {
|
||||||
@@ -106,6 +106,45 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/texts/35"
|
"$ref": "#/texts/35"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/36"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/37"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/38"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/39"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/40"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/41"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/42"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/43"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/44"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/45"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/46"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/47"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/48"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"content_layer": "body",
|
"content_layer": "body",
|
||||||
@@ -655,6 +694,169 @@
|
|||||||
"prov": [],
|
"prov": [],
|
||||||
"orig": "",
|
"orig": "",
|
||||||
"text": ""
|
"text": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/36",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Large operators and integrals are represented with n-ary objects in OMML XML:",
|
||||||
|
"text": "Large operators and integrals are represented with n-ary objects in OMML XML:",
|
||||||
|
"formatting": {
|
||||||
|
"bold": false,
|
||||||
|
"italic": false,
|
||||||
|
"underline": false,
|
||||||
|
"strikethrough": false,
|
||||||
|
"script": "baseline"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/37",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "",
|
||||||
|
"text": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/38",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "formula",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "\\sum_{0}^{2}x",
|
||||||
|
"text": "\\sum_{0}^{2}x"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/39",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "formula",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "\\bigcup_{n=1}^{m}\\left(X_{n} \\cap Y_{n}\\right)",
|
||||||
|
"text": "\\bigcup_{n=1}^{m}\\left(X_{n} \\cap Y_{n}\\right)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/40",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "formula",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "\\prod_{k=1}^{n}A_{k}",
|
||||||
|
"text": "\\prod_{k=1}^{n}A_{k}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/41",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "formula",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "\\bigwedge_{}^{}x",
|
||||||
|
"text": "\\bigwedge_{}^{}x"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/42",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "formula",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "\\int_{}^{}(2x+1)dx",
|
||||||
|
"text": "\\int_{}^{}(2x+1)dx"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/43",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "formula",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "\\iint_{0}^{1}xdx",
|
||||||
|
"text": "\\iint_{0}^{1}xdx"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/44",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "formula",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "\\iiint_{}^{}ydy",
|
||||||
|
"text": "\\iiint_{}^{}ydy"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/45",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "formula",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "\\oint_{}^{}\\frac{dy}{dx}",
|
||||||
|
"text": "\\oint_{}^{}\\frac{dy}{dx}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/46",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "formula",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "\\oiint_{0}^{2 \\pi }idt",
|
||||||
|
"text": "\\oiint_{0}^{2 \\pi }idt"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/47",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "formula",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "\\oiiint_{C}^{}\\frac{1}{z}dz",
|
||||||
|
"text": "\\oiiint_{C}^{}\\frac{1}{z}dz"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/48",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "",
|
||||||
|
"text": ""
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"pictures": [],
|
"pictures": [],
|
||||||
|
|||||||
@@ -26,4 +26,26 @@ This is a word document and this is an inline equation: $A= \pi r^{2}$ . If ins
|
|||||||
|
|
||||||
$$e^{x}=1+\frac{x}{1!}+\frac{x^{2}}{2!}+\frac{x^{3}}{3!}+ \text{ \textellipsis } , - \infty < x < \infty$$
|
$$e^{x}=1+\frac{x}{1!}+\frac{x^{2}}{2!}+\frac{x^{3}}{3!}+ \text{ \textellipsis } , - \infty < x < \infty$$
|
||||||
|
|
||||||
And that is an equation by itself. Cheers!
|
And that is an equation by itself. Cheers!
|
||||||
|
|
||||||
|
Large operators and integrals are represented with n-ary objects in OMML XML:
|
||||||
|
|
||||||
|
$$\sum_{0}^{2}x$$
|
||||||
|
|
||||||
|
$$\bigcup_{n=1}^{m}\left(X_{n} \cap Y_{n}\right)$$
|
||||||
|
|
||||||
|
$$\prod_{k=1}^{n}A_{k}$$
|
||||||
|
|
||||||
|
$$\bigwedge_{}^{}x$$
|
||||||
|
|
||||||
|
$$\int_{}^{}(2x+1)dx$$
|
||||||
|
|
||||||
|
$$\iint_{0}^{1}xdx$$
|
||||||
|
|
||||||
|
$$\iiint_{}^{}ydy$$
|
||||||
|
|
||||||
|
$$\oint_{}^{}\frac{dy}{dx}$$
|
||||||
|
|
||||||
|
$$\oiint_{0}^{2 \pi }idt$$
|
||||||
|
|
||||||
|
$$\oiiint_{C}^{}\frac{1}{z}dz$$
|
||||||
Reference in New Issue
Block a user