mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
fix(docx): parse integrals as n-ary objects without chr element (#2712)
Signed-off-by: Cesar Berrospi Ramis <ceb@zurich.ibm.com>
This commit is contained in:
committed by
GitHub
parent
f80c903c24
commit
c97715f5fd
@@ -65,6 +65,11 @@ CHR_BO = {
|
||||
"\u2210": "\\coprod",
|
||||
"\u2211": "\\sum",
|
||||
"\u222b": "\\int",
|
||||
"\u222c": "\\iint",
|
||||
"\u222d": "\\iiint",
|
||||
"\u222e": "\\oint",
|
||||
"\u222f": "\\oiint",
|
||||
"\u2230": "\\oiiint",
|
||||
"\u22c0": "\\bigwedge",
|
||||
"\u22c1": "\\bigvee",
|
||||
"\u22c2": "\\bigcap",
|
||||
|
||||
@@ -381,7 +381,8 @@ class oMath2Latex(Tag2Method):
|
||||
bo = ""
|
||||
for stag, t, e in self.process_children_list(elm):
|
||||
if stag == "naryPr":
|
||||
bo = get_val(t.chr, store=CHR_BO)
|
||||
# if <m:naryPr> contains no <m:chr>, the n-ary represents an integral
|
||||
bo = get_val(t.chr, default="\\int", store=CHR_BO)
|
||||
else:
|
||||
res.append(t)
|
||||
return bo + BLANK.join(res)
|
||||
|
||||
BIN
tests/data/docx/equations.docx
vendored
BIN
tests/data/docx/equations.docx
vendored
Binary file not shown.
@@ -37,4 +37,17 @@ item-0 at level 0: unspecified: group _root_
|
||||
item-36 at level 1: formula: e^{x}=1+\frac{x}{1!}+\frac{x^{2} ... xtellipsis } , - \infty < x < \infty
|
||||
item-37 at level 1: text:
|
||||
item-38 at level 1: text: And that is an equation by itself. Cheers!
|
||||
item-39 at level 1: text:
|
||||
item-39 at level 1: text:
|
||||
item-40 at level 1: text: Large operators and integrals ar ... sented with n-ary objects in OMML XML:
|
||||
item-41 at level 1: text:
|
||||
item-42 at level 1: formula: \sum_{0}^{2}x
|
||||
item-43 at level 1: formula: \bigcup_{n=1}^{m}\left(X_{n} \cap Y_{n}\right)
|
||||
item-44 at level 1: formula: \prod_{k=1}^{n}A_{k}
|
||||
item-45 at level 1: formula: \bigwedge_{}^{}x
|
||||
item-46 at level 1: formula: \int_{}^{}(2x+1)dx
|
||||
item-47 at level 1: formula: \iint_{0}^{1}xdx
|
||||
item-48 at level 1: formula: \iiint_{}^{}ydy
|
||||
item-49 at level 1: formula: \oint_{}^{}\frac{dy}{dx}
|
||||
item-50 at level 1: formula: \oiint_{0}^{2 \pi }idt
|
||||
item-51 at level 1: formula: \oiiint_{C}^{}\frac{1}{z}dz
|
||||
item-52 at level 1: text:
|
||||
@@ -1,10 +1,10 @@
|
||||
{
|
||||
"schema_name": "DoclingDocument",
|
||||
"version": "1.7.0",
|
||||
"version": "1.8.0",
|
||||
"name": "equations",
|
||||
"origin": {
|
||||
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"binary_hash": 11121138535595486899,
|
||||
"binary_hash": 8638432756089077257,
|
||||
"filename": "equations.docx"
|
||||
},
|
||||
"furniture": {
|
||||
@@ -106,6 +106,45 @@
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/35"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/36"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/37"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/38"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/39"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/40"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/41"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/42"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/43"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/44"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/45"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/46"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/47"
|
||||
},
|
||||
{
|
||||
"$ref": "#/texts/48"
|
||||
}
|
||||
],
|
||||
"content_layer": "body",
|
||||
@@ -655,6 +694,169 @@
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/36",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "text",
|
||||
"prov": [],
|
||||
"orig": "Large operators and integrals are represented with n-ary objects in OMML XML:",
|
||||
"text": "Large operators and integrals are represented with n-ary objects in OMML XML:",
|
||||
"formatting": {
|
||||
"bold": false,
|
||||
"italic": false,
|
||||
"underline": false,
|
||||
"strikethrough": false,
|
||||
"script": "baseline"
|
||||
}
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/37",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "text",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/38",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "\\sum_{0}^{2}x",
|
||||
"text": "\\sum_{0}^{2}x"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/39",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "\\bigcup_{n=1}^{m}\\left(X_{n} \\cap Y_{n}\\right)",
|
||||
"text": "\\bigcup_{n=1}^{m}\\left(X_{n} \\cap Y_{n}\\right)"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/40",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "\\prod_{k=1}^{n}A_{k}",
|
||||
"text": "\\prod_{k=1}^{n}A_{k}"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/41",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "\\bigwedge_{}^{}x",
|
||||
"text": "\\bigwedge_{}^{}x"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/42",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "\\int_{}^{}(2x+1)dx",
|
||||
"text": "\\int_{}^{}(2x+1)dx"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/43",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "\\iint_{0}^{1}xdx",
|
||||
"text": "\\iint_{0}^{1}xdx"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/44",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "\\iiint_{}^{}ydy",
|
||||
"text": "\\iiint_{}^{}ydy"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/45",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "\\oint_{}^{}\\frac{dy}{dx}",
|
||||
"text": "\\oint_{}^{}\\frac{dy}{dx}"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/46",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "\\oiint_{0}^{2 \\pi }idt",
|
||||
"text": "\\oiint_{0}^{2 \\pi }idt"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/47",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "formula",
|
||||
"prov": [],
|
||||
"orig": "\\oiiint_{C}^{}\\frac{1}{z}dz",
|
||||
"text": "\\oiiint_{C}^{}\\frac{1}{z}dz"
|
||||
},
|
||||
{
|
||||
"self_ref": "#/texts/48",
|
||||
"parent": {
|
||||
"$ref": "#/body"
|
||||
},
|
||||
"children": [],
|
||||
"content_layer": "body",
|
||||
"label": "text",
|
||||
"prov": [],
|
||||
"orig": "",
|
||||
"text": ""
|
||||
}
|
||||
],
|
||||
"pictures": [],
|
||||
|
||||
@@ -26,4 +26,26 @@ This is a word document and this is an inline equation: $A= \pi r^{2}$ . If ins
|
||||
|
||||
$$e^{x}=1+\frac{x}{1!}+\frac{x^{2}}{2!}+\frac{x^{3}}{3!}+ \text{ \textellipsis } , - \infty < x < \infty$$
|
||||
|
||||
And that is an equation by itself. Cheers!
|
||||
And that is an equation by itself. Cheers!
|
||||
|
||||
Large operators and integrals are represented with n-ary objects in OMML XML:
|
||||
|
||||
$$\sum_{0}^{2}x$$
|
||||
|
||||
$$\bigcup_{n=1}^{m}\left(X_{n} \cap Y_{n}\right)$$
|
||||
|
||||
$$\prod_{k=1}^{n}A_{k}$$
|
||||
|
||||
$$\bigwedge_{}^{}x$$
|
||||
|
||||
$$\int_{}^{}(2x+1)dx$$
|
||||
|
||||
$$\iint_{0}^{1}xdx$$
|
||||
|
||||
$$\iiint_{}^{}ydy$$
|
||||
|
||||
$$\oint_{}^{}\frac{dy}{dx}$$
|
||||
|
||||
$$\oiint_{0}^{2 \pi }idt$$
|
||||
|
||||
$$\oiiint_{C}^{}\frac{1}{z}dz$$
|
||||
Reference in New Issue
Block a user