fix(markdown): ensure correct parsing of nested lists (#1995)

* fix(markdown): ensure correct parsing of nested lists

Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>

* chore: update dependencies in uv.lock file

Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>

---------

Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
Cesar Berrospi Ramis
2025-07-25 15:17:57 +02:00
committed by GitHub
parent 1985841a19
commit aec29a7315
6 changed files with 765 additions and 550 deletions

View File

@@ -16,7 +16,7 @@ def test_convert_valid():
relevant_paths = sorted((root_path / "md").rglob("*.md"))
assert len(relevant_paths) > 0
yaml_filter = ["inline_and_formatting"]
yaml_filter = ["inline_and_formatting", "mixed_without_h1"]
for in_path in relevant_paths:
md_gt_path = root_path / "groundtruth" / "docling_v2" / f"{in_path.name}.md"
@@ -41,12 +41,11 @@ def test_convert_valid():
f.write(f"{act_data}\n")
if in_path.stem in yaml_filter:
with open(yaml_gt_path, mode="w", encoding="utf-8") as f:
act_doc.save_as_yaml(
yaml_gt_path,
coord_precision=COORD_PREC,
confid_precision=CONFID_PREC,
)
act_doc.save_as_yaml(
yaml_gt_path,
coord_precision=COORD_PREC,
confid_precision=CONFID_PREC,
)
else:
with open(md_gt_path, encoding="utf-8") as f:
exp_data = f.read().rstrip()
@@ -54,4 +53,4 @@ def test_convert_valid():
if in_path.stem in yaml_filter:
exp_doc = DoclingDocument.load_from_yaml(yaml_gt_path)
assert act_doc == exp_doc
assert act_doc == exp_doc, f"export to yaml failed on {in_path}"