mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-11 06:08:09 +00:00
fix(markdown): make parsing of rich table cells valid (#1821)
* fix: update md table classification Signed-off-by: Michael Honaker <Michael.Honaker@ibm.com> * Fix ground truth header changes Signed-off-by: Michael Honaker <Michael.Honaker@ibm.com> * Fix merge issues Signed-off-by: Michael Honaker <Michael.Honaker@ibm.com> * Fix minor ground truth errors Signed-off-by: Michael Honaker <Michael.Honaker@ibm.com> --------- Signed-off-by: Michael Honaker <Michael.Honaker@ibm.com>
This commit is contained in:
@@ -335,7 +335,7 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
|
||||
_log.debug(f" - Paragraph (raw text): {element.children}")
|
||||
snippet_text = element.children.strip()
|
||||
# Detect start of the table:
|
||||
if "|" in snippet_text:
|
||||
if "|" in snippet_text or self.in_table:
|
||||
# most likely part of the markdown table
|
||||
self.in_table = True
|
||||
if len(self.md_table_buffer) > 0:
|
||||
|
||||
Reference in New Issue
Block a user