From 849aa759c73d125ccd0232ee74ab87b521c7061e Mon Sep 17 00:00:00 2001 From: Matteo Omenetti Date: Thu, 23 Jan 2025 07:38:39 -0500 Subject: [PATCH] removed print statements Signed-off-by: Matteo Omenetti --- docling/models/code_formula_model.py | 21 +++++++-------------- tests/test_code_formula.py | 1 - 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/docling/models/code_formula_model.py b/docling/models/code_formula_model.py index ca48980d..1eb9e74d 100644 --- a/docling/models/code_formula_model.py +++ b/docling/models/code_formula_model.py @@ -142,7 +142,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel): ) def _extract_code_language(self, input_string: str) -> Tuple[str, Optional[str]]: - """Extracts a programming language from the beginning of a (possibly multi-line) string. + """Extracts a programming language from the beginning of a string. This function checks if the input string starts with a pattern of the form ``<_some_language_>``. If it does, it extracts the language string and returns @@ -162,12 +162,6 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel): - The second element is the extracted language if a match is found; otherwise, `None`. """ - # Explanation of the regex: - # ^<_([^>]+)> : match "<_something>" at the start, capturing "something" (Group 1) - # \s* : optional whitespace - # (.*) : capture everything after that in Group 2 - # - # We also use re.DOTALL so that the (.*) part can include newlines. pattern = r"^<_([^>]+)_>\s*(.*)" match = re.match(pattern, input_string, flags=re.DOTALL) if match: @@ -209,18 +203,17 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel): element_prov = element.prov[0] - expansion_factor = 0.03 # Adjust the expansion percentage as needed + expansion_factor = 0.03 bbox = element_prov.bbox width = bbox.r - bbox.l height = bbox.t - bbox.b - # Create the expanded bounding box expanded_bbox = BoundingBox( - l=bbox.l - width * expansion_factor, # Expand left - t=bbox.t + height * expansion_factor, # Expand top - r=bbox.r + width * expansion_factor, # Expand right - b=bbox.b - height * expansion_factor, # Expand bottom - coord_origin=bbox.coord_origin, # Preserve coordinate origin + l=bbox.l - width * expansion_factor, + t=bbox.t + height * expansion_factor, + r=bbox.r + width * expansion_factor, + b=bbox.b - height * expansion_factor, + coord_origin=bbox.coord_origin, ) page_ix = element_prov.page_no - 1 diff --git a/tests/test_code_formula.py b/tests/test_code_formula.py index f7843286..05e87246 100644 --- a/tests/test_code_formula.py +++ b/tests/test_code_formula.py @@ -49,7 +49,6 @@ def test_code_and_formula_conversion(): assert len(code_blocks) == 1 gt = 'public static void print() {\n System.out.println("Java Code");\n}' - print(gt) predicted = code_blocks[0].text.strip() assert predicted == gt, f"mismatch in text {predicted=}, {gt=}"