diff --git a/docling/models/code_formula_model.py b/docling/models/code_formula_model.py index e4d56945..3c0be463 100644 --- a/docling/models/code_formula_model.py +++ b/docling/models/code_formula_model.py @@ -62,7 +62,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel): """ images_scale = 1.66 # = 120 dpi, aligned with training data resolution - expansion_factor = 0.03 + expansion_factor = 0.1 def __init__( self, @@ -118,7 +118,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel): repo_id="ds4sd/CodeFormula", force_download=force, local_dir=local_dir, - revision="v1.0.0", + revision="v1.0.1", ) return Path(download_path) diff --git a/tests/data/code_and_formula.pdf b/tests/data/code_and_formula.pdf index 82cd8343..d4f23d2e 100644 Binary files a/tests/data/code_and_formula.pdf and b/tests/data/code_and_formula.pdf differ diff --git a/tests/test_code_formula.py b/tests/test_code_formula.py index 05e87246..594f0306 100644 --- a/tests/test_code_formula.py +++ b/tests/test_code_formula.py @@ -48,11 +48,11 @@ def test_code_and_formula_conversion(): code_blocks = [el for el in results if isinstance(el, CodeItem)] assert len(code_blocks) == 1 - gt = 'public static void print() {\n System.out.println("Java Code");\n}' + gt = 'int main() {\n printf("Hello, World!");\n return 0;\n}' predicted = code_blocks[0].text.strip() assert predicted == gt, f"mismatch in text {predicted=}, {gt=}" - assert code_blocks[0].code_language == CodeLanguageLabel.JAVA + assert code_blocks[0].code_language == CodeLanguageLabel.C_PLUS_PLUS formula_blocks = [ el