pre commit fixes, issue with pylatexenc

This commit is contained in:
Rafael Teixeira de Lima 2025-01-27 10:02:21 +01:00
parent 784eafbed5
commit 9b5e482d1e
2 changed files with 31 additions and 22 deletions

View File

@ -5,35 +5,36 @@ Adapted from https://github.com/xiilei/dwml/blob/master/dwml/omml.py
On 23/01/2025 On 23/01/2025
""" """
from pylatexenc.latexencode import UnicodeToLatexEncoder
import lxml.etree as ET import lxml.etree as ET
import pylatexenc # type: ignore
from docling.backend.docx_latex.latex_dict import ( from docling.backend.docx_latex.latex_dict import (
ALN,
ARR,
BACKSLASH,
BLANK,
BRK,
CHARS, CHARS,
CHR, CHR,
CHR_BO, CHR_BO,
CHR_DEFAULT, CHR_DEFAULT,
POS,
POS_DEFAULT,
SUB,
SUP,
F,
F_DEFAULT,
T,
FUNC,
D,
D_DEFAULT, D_DEFAULT,
RAD, F_DEFAULT,
RAD_DEFAULT, FUNC,
ARR, FUNC_PLACE,
LIM_FUNC, LIM_FUNC,
LIM_TO, LIM_TO,
LIM_UPP, LIM_UPP,
POS,
POS_DEFAULT,
RAD,
RAD_DEFAULT,
SUB,
SUP,
D,
F,
M, M,
BRK, T,
BLANK,
BACKSLASH,
ALN,
FUNC_PLACE,
) )
OMML_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/math}" OMML_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/math}"
@ -176,7 +177,7 @@ class oMath2Latex(Tag2Method):
_t_dict = T _t_dict = T
__direct_tags = ("box", "sSub", "sSup", "sSubSup", "num", "den", "deg", "e") __direct_tags = ("box", "sSub", "sSup", "sSubSup", "num", "den", "deg", "e")
u = UnicodeToLatexEncoder( u = pylatexenc.latexencode.UnicodeToLatexEncoder(
replacement_latex_protection="braces-all", replacement_latex_protection="braces-all",
unknown_char_policy="keep", unknown_char_policy="keep",
unknown_char_warning=False, unknown_char_warning=False,
@ -229,13 +230,22 @@ class oMath2Latex(Tag2Method):
c_dict = self.process_children_dict(elm) c_dict = self.process_children_dict(elm)
pr = c_dict["dPr"] pr = c_dict["dPr"]
null = D_DEFAULT.get("null") null = D_DEFAULT.get("null")
print(pr.text)
s_val = get_val(pr.begChr, default=D_DEFAULT.get("left"), store=T) s_val = get_val(pr.begChr, default=D_DEFAULT.get("left"), store=T)
print(pr.begChr, D_DEFAULT.get("left"), s_val)
e_val = get_val(pr.endChr, default=D_DEFAULT.get("right"), store=T) e_val = get_val(pr.endChr, default=D_DEFAULT.get("right"), store=T)
return pr.text + D.format( print(pr.endChr, D_DEFAULT.get("right"), s_val)
delim = pr.text + D.format(
left=null if not s_val else escape_latex(s_val), left=null if not s_val else escape_latex(s_val),
text=c_dict["e"], text=c_dict["e"],
right=null if not e_val else escape_latex(e_val), right=null if not e_val else escape_latex(e_val),
) )
print(delim)
print()
return delim
def do_spre(self, elm): def do_spre(self, elm):
""" """

View File

@ -19,11 +19,10 @@ from lxml.etree import XPath
from PIL import Image, UnidentifiedImageError from PIL import Image, UnidentifiedImageError
from docling.backend.abstract_backend import DeclarativeDocumentBackend from docling.backend.abstract_backend import DeclarativeDocumentBackend
from docling.backend.docx_latex.omml import oMath2Latex
from docling.datamodel.base_models import InputFormat from docling.datamodel.base_models import InputFormat
from docling.datamodel.document import InputDocument from docling.datamodel.document import InputDocument
from docling.backend.docx_latex.omml import oMath2Latex
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)