mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
fix: fix single newline handling in MD backend (#824)
Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
This commit is contained in:
35
tests/test_backend_markdown.py
Normal file
35
tests/test_backend_markdown.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from pathlib import Path
|
||||
|
||||
from docling.backend.md_backend import MarkdownDocumentBackend
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
from docling.datamodel.document import InputDocument
|
||||
|
||||
|
||||
def test_convert_valid():
|
||||
fmt = InputFormat.MD
|
||||
cls = MarkdownDocumentBackend
|
||||
|
||||
test_data_path = Path("tests") / "data"
|
||||
relevant_paths = sorted((test_data_path / "md").rglob("*.md"))
|
||||
assert len(relevant_paths) > 0
|
||||
|
||||
for in_path in relevant_paths:
|
||||
gt_path = test_data_path / "groundtruth" / "docling_v2" / f"{in_path.name}.md"
|
||||
|
||||
in_doc = InputDocument(
|
||||
path_or_stream=in_path,
|
||||
format=fmt,
|
||||
backend=cls,
|
||||
)
|
||||
backend = cls(
|
||||
in_doc=in_doc,
|
||||
path_or_stream=in_path,
|
||||
)
|
||||
assert backend.is_valid()
|
||||
|
||||
act_doc = backend.convert()
|
||||
act_data = act_doc.export_to_markdown()
|
||||
|
||||
with open(gt_path, "r", encoding="utf-8") as f:
|
||||
exp_data = f.read().rstrip()
|
||||
assert act_data == exp_data
|
||||
Reference in New Issue
Block a user