diff --git a/docling/models/page_preprocessing_model.py b/docling/models/page_preprocessing_model.py index dc96c30f..306449b0 100644 --- a/docling/models/page_preprocessing_model.py +++ b/docling/models/page_preprocessing_model.py @@ -125,8 +125,8 @@ class PagePreprocessingModel(BasePageModel): penalty += 0.1 * len(frag_matches) # Additional heuristic: if the average token length is below 2, add a penalty. - tokens = text.split() - if tokens and (sum(map(len, tokens)) / len(tokens)) < 2: - penalty += 0.2 + # tokens = text.split() + # if tokens and (sum(map(len, tokens)) / len(tokens)) < 2: + # penalty += 0.2 return max(1.0 - penalty, 0.0)