fix(test): Introduce parameter in verify_conversion_result() to allow skipping the verification of the cells. It is used in case of OCR tests.

Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com>
2025-07-27 12:34:22 +00:00 · 2024-10-08 14:14:43 +02:00 · 2024-10-08 14:14:43 +02:00 · 29e65e911b
commit 29e65e911b
parent 072aaf6bb1
2 changed files with 16 additions and 9 deletions
--- a/tests/test_e2e_ocr_conversion.py
+++ b/tests/test_e2e_ocr_conversion.py
@ -24,7 +24,7 @@ def save_output(pdf_path: Path, doc_result: ConversionResult, engine: str):
    import os
    parent = pdf_path.parent
-    eng = "" if engine is None else ".{engine}"
+    eng = "" if engine is None else f".{engine}"
    dict_fn = os.path.join(parent, f"{pdf_path.stem}{eng}.json")
    with open(dict_fn, "w") as fd:
@ -94,4 +94,5 @@ def test_e2e_conversions():
                input_path=pdf_path,
                doc_result=doc_result,
                generate=GENERATE,
                skip_cells=True,
            )
--- a/tests/verify_utils.py
+++ b/tests/verify_utils.py
@ -130,7 +130,11 @@ def verify_dt(doc_pred_dt, doc_true_dt):
 def verify_conversion_result(
-    input_path: Path, doc_result: ConversionResult, generate=False
+    input_path: Path,
    doc_result: ConversionResult,
    generate: bool = False,
    ocr_engine: str = None,
    skip_cells: bool = False,
 ):
    PageList = TypeAdapter(List[Page])
@ -143,10 +147,11 @@ def verify_conversion_result(
    doc_pred_md = doc_result.render_as_markdown()
    doc_pred_dt = doc_result.render_as_doctags()
-    pages_path = input_path.with_suffix(".pages.json")
+    engine_suffix = "" if ocr_engine is None else f".{ocr_engine}"
-    json_path = input_path.with_suffix(".json")
+    pages_path = input_path.with_suffix(f"{engine_suffix}.pages.json")
-    md_path = input_path.with_suffix(".md")
+    json_path = input_path.with_suffix(f"{engine_suffix}.json")
-    dt_path = input_path.with_suffix(".doctags.txt")
+    md_path = input_path.with_suffix(f"{engine_suffix}.md")
    dt_path = input_path.with_suffix(f"{engine_suffix}.doctags.txt")
    if generate:  # only used when re-generating truth
        with open(pages_path, "w") as fw:
@ -173,6 +178,7 @@ def verify_conversion_result(
        with open(dt_path, "r") as fr:
            doc_true_dt = fr.read()
        if not skip_cells:
            assert verify_cells(
                doc_pred_pages, doc_true_pages
            ), f"Mismatch in PDF cell prediction for {input_path}"