added three test-files for right-to-left

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
Peter Staar
2025-02-04 14:49:19 +01:00
parent 5db82d5b67
commit d7c9874a88
6 changed files with 115 additions and 144 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -248,13 +248,14 @@ def verify_conversion_result_v1(
doc_pred_md = doc_result.legacy_document.export_to_markdown()
doc_pred_dt = doc_result.legacy_document.export_to_document_tokens()
engine_suffix = "" if ocr_engine is None else f".{ocr_engine}"
gt_subpath = input_path.parent / "groundtruth" / "docling_v1" / input_path.name
if str(input_path.parent).endswith("pdf"):
gt_subpath = input_path.parent.parent / "groundtruth" / "docling_v1" / input_path.name
gt_subpath = (
input_path.parent.parent / "groundtruth" / "docling_v1" / input_path.name
)
pages_path = gt_subpath.with_suffix(f"{engine_suffix}.pages.json")
json_path = gt_subpath.with_suffix(f"{engine_suffix}.json")
md_path = gt_subpath.with_suffix(f"{engine_suffix}.md")
@@ -333,8 +334,10 @@ def verify_conversion_result_v2(
gt_subpath = input_path.parent / "groundtruth" / "docling_v2" / input_path.name
if str(input_path.parent).endswith("pdf"):
gt_subpath = input_path.parent.parent / "groundtruth" / "docling_v2" / input_path.name
gt_subpath = (
input_path.parent.parent / "groundtruth" / "docling_v2" / input_path.name
)
pages_path = gt_subpath.with_suffix(f"{engine_suffix}.pages.json")
json_path = gt_subpath.with_suffix(f"{engine_suffix}.json")
md_path = gt_subpath.with_suffix(f"{engine_suffix}.md")