From 5d090c59c42020a1f17c215aae5ca7d8bac632c9 Mon Sep 17 00:00:00 2001 From: Maksym Lysak Date: Thu, 24 Oct 2024 13:28:03 +0200 Subject: [PATCH] Added comment explaining reason for fix Signed-off-by: Maksym Lysak --- docling/backend/md_backend.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docling/backend/md_backend.py b/docling/backend/md_backend.py index 5b3043b9..a1de7f5a 100644 --- a/docling/backend/md_backend.py +++ b/docling/backend/md_backend.py @@ -27,6 +27,8 @@ _log = logging.getLogger(__name__) class MarkdownDocumentBackend(DeclarativeDocumentBackend): def clean_md(self, md_text): + # Long sequences of unescaped underscore symbols "_" hangs parser + # Up to 3 characters "___" are allowed to represent italic, bold, and bold-italic res_text = md_text.replace("____", "") return res_text