mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-02 15:32:30 +00:00
pre commit fixes, issue with pylatexenc
This commit is contained in:
parent
784eafbed5
commit
9b5e482d1e
@ -5,35 +5,36 @@ Adapted from https://github.com/xiilei/dwml/blob/master/dwml/omml.py
|
|||||||
On 23/01/2025
|
On 23/01/2025
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from pylatexenc.latexencode import UnicodeToLatexEncoder
|
|
||||||
import lxml.etree as ET
|
import lxml.etree as ET
|
||||||
|
import pylatexenc # type: ignore
|
||||||
|
|
||||||
from docling.backend.docx_latex.latex_dict import (
|
from docling.backend.docx_latex.latex_dict import (
|
||||||
|
ALN,
|
||||||
|
ARR,
|
||||||
|
BACKSLASH,
|
||||||
|
BLANK,
|
||||||
|
BRK,
|
||||||
CHARS,
|
CHARS,
|
||||||
CHR,
|
CHR,
|
||||||
CHR_BO,
|
CHR_BO,
|
||||||
CHR_DEFAULT,
|
CHR_DEFAULT,
|
||||||
POS,
|
|
||||||
POS_DEFAULT,
|
|
||||||
SUB,
|
|
||||||
SUP,
|
|
||||||
F,
|
|
||||||
F_DEFAULT,
|
|
||||||
T,
|
|
||||||
FUNC,
|
|
||||||
D,
|
|
||||||
D_DEFAULT,
|
D_DEFAULT,
|
||||||
RAD,
|
F_DEFAULT,
|
||||||
RAD_DEFAULT,
|
FUNC,
|
||||||
ARR,
|
FUNC_PLACE,
|
||||||
LIM_FUNC,
|
LIM_FUNC,
|
||||||
LIM_TO,
|
LIM_TO,
|
||||||
LIM_UPP,
|
LIM_UPP,
|
||||||
|
POS,
|
||||||
|
POS_DEFAULT,
|
||||||
|
RAD,
|
||||||
|
RAD_DEFAULT,
|
||||||
|
SUB,
|
||||||
|
SUP,
|
||||||
|
D,
|
||||||
|
F,
|
||||||
M,
|
M,
|
||||||
BRK,
|
T,
|
||||||
BLANK,
|
|
||||||
BACKSLASH,
|
|
||||||
ALN,
|
|
||||||
FUNC_PLACE,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
OMML_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/math}"
|
OMML_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/math}"
|
||||||
@ -176,7 +177,7 @@ class oMath2Latex(Tag2Method):
|
|||||||
_t_dict = T
|
_t_dict = T
|
||||||
|
|
||||||
__direct_tags = ("box", "sSub", "sSup", "sSubSup", "num", "den", "deg", "e")
|
__direct_tags = ("box", "sSub", "sSup", "sSubSup", "num", "den", "deg", "e")
|
||||||
u = UnicodeToLatexEncoder(
|
u = pylatexenc.latexencode.UnicodeToLatexEncoder(
|
||||||
replacement_latex_protection="braces-all",
|
replacement_latex_protection="braces-all",
|
||||||
unknown_char_policy="keep",
|
unknown_char_policy="keep",
|
||||||
unknown_char_warning=False,
|
unknown_char_warning=False,
|
||||||
@ -229,13 +230,22 @@ class oMath2Latex(Tag2Method):
|
|||||||
c_dict = self.process_children_dict(elm)
|
c_dict = self.process_children_dict(elm)
|
||||||
pr = c_dict["dPr"]
|
pr = c_dict["dPr"]
|
||||||
null = D_DEFAULT.get("null")
|
null = D_DEFAULT.get("null")
|
||||||
|
|
||||||
|
print(pr.text)
|
||||||
s_val = get_val(pr.begChr, default=D_DEFAULT.get("left"), store=T)
|
s_val = get_val(pr.begChr, default=D_DEFAULT.get("left"), store=T)
|
||||||
|
print(pr.begChr, D_DEFAULT.get("left"), s_val)
|
||||||
|
|
||||||
e_val = get_val(pr.endChr, default=D_DEFAULT.get("right"), store=T)
|
e_val = get_val(pr.endChr, default=D_DEFAULT.get("right"), store=T)
|
||||||
return pr.text + D.format(
|
print(pr.endChr, D_DEFAULT.get("right"), s_val)
|
||||||
|
|
||||||
|
delim = pr.text + D.format(
|
||||||
left=null if not s_val else escape_latex(s_val),
|
left=null if not s_val else escape_latex(s_val),
|
||||||
text=c_dict["e"],
|
text=c_dict["e"],
|
||||||
right=null if not e_val else escape_latex(e_val),
|
right=null if not e_val else escape_latex(e_val),
|
||||||
)
|
)
|
||||||
|
print(delim)
|
||||||
|
print()
|
||||||
|
return delim
|
||||||
|
|
||||||
def do_spre(self, elm):
|
def do_spre(self, elm):
|
||||||
"""
|
"""
|
||||||
|
@ -19,11 +19,10 @@ from lxml.etree import XPath
|
|||||||
from PIL import Image, UnidentifiedImageError
|
from PIL import Image, UnidentifiedImageError
|
||||||
|
|
||||||
from docling.backend.abstract_backend import DeclarativeDocumentBackend
|
from docling.backend.abstract_backend import DeclarativeDocumentBackend
|
||||||
|
from docling.backend.docx_latex.omml import oMath2Latex
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.document import InputDocument
|
from docling.datamodel.document import InputDocument
|
||||||
|
|
||||||
from docling.backend.docx_latex.omml import oMath2Latex
|
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user