switch to code formula model v1.0.1 and new test pdf

Signed-off-by: Matteo-Omenetti <Matteo.Omenetti1@ibm.com>
This commit is contained in:
Matteo-Omenetti 2025-02-04 13:28:13 +01:00
parent b5da4080c9
commit 48c57144d2
3 changed files with 4 additions and 4 deletions

View File

@ -62,7 +62,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
"""
images_scale = 1.66 # = 120 dpi, aligned with training data resolution
expansion_factor = 0.03
expansion_factor = 0.1
def __init__(
self,
@ -118,7 +118,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
repo_id="ds4sd/CodeFormula",
force_download=force,
local_dir=local_dir,
revision="v1.0.0",
revision="v1.0.1",
)
return Path(download_path)

Binary file not shown.

View File

@ -48,11 +48,11 @@ def test_code_and_formula_conversion():
code_blocks = [el for el in results if isinstance(el, CodeItem)]
assert len(code_blocks) == 1
gt = 'public static void print() {\n System.out.println("Java Code");\n}'
gt = 'int main() {\n printf("Hello, World!");\n return 0;\n}'
predicted = code_blocks[0].text.strip()
assert predicted == gt, f"mismatch in text {predicted=}, {gt=}"
assert code_blocks[0].code_language == CodeLanguageLabel.JAVA
assert code_blocks[0].code_language == CodeLanguageLabel.C_PLUS_PLUS
formula_blocks = [
el