mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-02 15:32:30 +00:00
removed print statements
Signed-off-by: Matteo Omenetti <omenetti.matteo@gmail.com>
This commit is contained in:
parent
a59c03b27f
commit
849aa759c7
@ -142,7 +142,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _extract_code_language(self, input_string: str) -> Tuple[str, Optional[str]]:
|
def _extract_code_language(self, input_string: str) -> Tuple[str, Optional[str]]:
|
||||||
"""Extracts a programming language from the beginning of a (possibly multi-line) string.
|
"""Extracts a programming language from the beginning of a string.
|
||||||
|
|
||||||
This function checks if the input string starts with a pattern of the form
|
This function checks if the input string starts with a pattern of the form
|
||||||
``<_some_language_>``. If it does, it extracts the language string and returns
|
``<_some_language_>``. If it does, it extracts the language string and returns
|
||||||
@ -162,12 +162,6 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
|
|||||||
- The second element is the extracted language if a match is found;
|
- The second element is the extracted language if a match is found;
|
||||||
otherwise, `None`.
|
otherwise, `None`.
|
||||||
"""
|
"""
|
||||||
# Explanation of the regex:
|
|
||||||
# ^<_([^>]+)> : match "<_something>" at the start, capturing "something" (Group 1)
|
|
||||||
# \s* : optional whitespace
|
|
||||||
# (.*) : capture everything after that in Group 2
|
|
||||||
#
|
|
||||||
# We also use re.DOTALL so that the (.*) part can include newlines.
|
|
||||||
pattern = r"^<_([^>]+)_>\s*(.*)"
|
pattern = r"^<_([^>]+)_>\s*(.*)"
|
||||||
match = re.match(pattern, input_string, flags=re.DOTALL)
|
match = re.match(pattern, input_string, flags=re.DOTALL)
|
||||||
if match:
|
if match:
|
||||||
@ -209,18 +203,17 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
|
|||||||
|
|
||||||
element_prov = element.prov[0]
|
element_prov = element.prov[0]
|
||||||
|
|
||||||
expansion_factor = 0.03 # Adjust the expansion percentage as needed
|
expansion_factor = 0.03
|
||||||
bbox = element_prov.bbox
|
bbox = element_prov.bbox
|
||||||
width = bbox.r - bbox.l
|
width = bbox.r - bbox.l
|
||||||
height = bbox.t - bbox.b
|
height = bbox.t - bbox.b
|
||||||
|
|
||||||
# Create the expanded bounding box
|
|
||||||
expanded_bbox = BoundingBox(
|
expanded_bbox = BoundingBox(
|
||||||
l=bbox.l - width * expansion_factor, # Expand left
|
l=bbox.l - width * expansion_factor,
|
||||||
t=bbox.t + height * expansion_factor, # Expand top
|
t=bbox.t + height * expansion_factor,
|
||||||
r=bbox.r + width * expansion_factor, # Expand right
|
r=bbox.r + width * expansion_factor,
|
||||||
b=bbox.b - height * expansion_factor, # Expand bottom
|
b=bbox.b - height * expansion_factor,
|
||||||
coord_origin=bbox.coord_origin, # Preserve coordinate origin
|
coord_origin=bbox.coord_origin,
|
||||||
)
|
)
|
||||||
|
|
||||||
page_ix = element_prov.page_no - 1
|
page_ix = element_prov.page_no - 1
|
||||||
|
@ -49,7 +49,6 @@ def test_code_and_formula_conversion():
|
|||||||
assert len(code_blocks) == 1
|
assert len(code_blocks) == 1
|
||||||
|
|
||||||
gt = 'public static void print() {\n System.out.println("Java Code");\n}'
|
gt = 'public static void print() {\n System.out.println("Java Code");\n}'
|
||||||
print(gt)
|
|
||||||
|
|
||||||
predicted = code_blocks[0].text.strip()
|
predicted = code_blocks[0].text.strip()
|
||||||
assert predicted == gt, f"mismatch in text {predicted=}, {gt=}"
|
assert predicted == gt, f"mismatch in text {predicted=}, {gt=}"
|
||||||
|
Loading…
Reference in New Issue
Block a user