making fix more rare

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
Maksym Lysak 2024-10-25 10:10:24 +02:00
parent d654a292e8
commit 9b5b14f1a8

View File

@ -1,5 +1,6 @@
import logging import logging
import re import re
import warnings
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import Set, Union from typing import Set, Union
@ -27,7 +28,7 @@ _log = logging.getLogger(__name__)
class MarkdownDocumentBackend(DeclarativeDocumentBackend): class MarkdownDocumentBackend(DeclarativeDocumentBackend):
def shorten_underscore_sequences(self, markdown_text, max_length=4): def shorten_underscore_sequences(self, markdown_text, max_length=10):
# This regex will match any sequence of underscores # This regex will match any sequence of underscores
pattern = r"_+" pattern = r"_+"
@ -45,6 +46,9 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
# Use re.sub to replace long underscore sequences # Use re.sub to replace long underscore sequences
shortened_text = re.sub(pattern, replace_match, markdown_text) shortened_text = re.sub(pattern, replace_match, markdown_text)
if len(shortened_text) != len(markdown_text):
warnings.warn("Detected potentially incorrect Markdown, correcting...")
return shortened_text return shortened_text
def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]): def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):