diff --git a/docling/models/code_formula_model.py b/docling/models/code_formula_model.py index 8c0c8d5d..1721bd9d 100644 --- a/docling/models/code_formula_model.py +++ b/docling/models/code_formula_model.py @@ -62,7 +62,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel): """ images_scale = 1.66 # = 120 dpi, aligned with training data resolution - expansion_factor = 0.05 + expansion_factor = 0.03 def __init__( self, diff --git a/tests/data/code_and_formula.pdf b/tests/data/code_and_formula.pdf index d4f23d2e..258c5cb2 100644 Binary files a/tests/data/code_and_formula.pdf and b/tests/data/code_and_formula.pdf differ diff --git a/tests/test_code_formula.py b/tests/test_code_formula.py index 594f0306..2f3d1925 100644 --- a/tests/test_code_formula.py +++ b/tests/test_code_formula.py @@ -48,11 +48,11 @@ def test_code_and_formula_conversion(): code_blocks = [el for el in results if isinstance(el, CodeItem)] assert len(code_blocks) == 1 - gt = 'int main() {\n printf("Hello, World!");\n return 0;\n}' + gt = 'function add(a, b) {\n return a + b;\n}\nconsole.log(add(3, 5));' - predicted = code_blocks[0].text.strip() + predicted = code_blocks[0].text.strip() assert predicted == gt, f"mismatch in text {predicted=}, {gt=}" - assert code_blocks[0].code_language == CodeLanguageLabel.C_PLUS_PLUS + assert code_blocks[0].code_language == CodeLanguageLabel.JAVASCRIPT formula_blocks = [ el @@ -61,6 +61,6 @@ def test_code_and_formula_conversion(): ] assert len(formula_blocks) == 1 - gt = "a ^ { 2 } + 8 = 1 2" + gt = "x ^ { 2 } + 8 = 1 2" predicted = formula_blocks[0].text assert predicted == gt, f"mismatch in text {predicted=}, {gt=}"