mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
fix: support escaped characters in markdown backend (#2304)
fix: improve markdown backend to support input documents with escaped characters Signed-off-by: Lucas Morin <lucas.morin222@gmail.com>
This commit is contained in:
@@ -26,10 +26,12 @@ def test_convert_valid():
|
||||
assert len(relevant_paths) > 0
|
||||
|
||||
yaml_filter = ["inline_and_formatting", "mixed_without_h1"]
|
||||
json_filter = ["escaped_characters"]
|
||||
|
||||
for in_path in relevant_paths:
|
||||
md_gt_path = root_path / "groundtruth" / "docling_v2" / f"{in_path.name}.md"
|
||||
yaml_gt_path = root_path / "groundtruth" / "docling_v2" / f"{in_path.name}.yaml"
|
||||
json_gt_path = root_path / "groundtruth" / "docling_v2" / f"{in_path.name}.json"
|
||||
|
||||
in_doc = InputDocument(
|
||||
path_or_stream=in_path,
|
||||
@@ -45,6 +47,9 @@ def test_convert_valid():
|
||||
act_doc = backend.convert()
|
||||
act_data = act_doc.export_to_markdown()
|
||||
|
||||
if in_path.stem in json_filter:
|
||||
assert verify_document(act_doc, json_gt_path, GENERATE), "export to json"
|
||||
|
||||
if GEN_TEST_DATA:
|
||||
with open(md_gt_path, mode="w", encoding="utf-8") as f:
|
||||
f.write(f"{act_data}\n")
|
||||
|
||||
Reference in New Issue
Block a user