fix: bumped the glm version and adjusted the tests (#83)

* bumped the glm version and adjusted the tests

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* updated the poetry lock

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* fix hooks

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* fixed the tests

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* reformatted the code

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* added the tests for tables

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

---------

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Peter W. J. Staar
2024-09-18 07:43:49 +02:00
committed by GitHub
parent 8242bce4fa
commit 442443a102
11 changed files with 406 additions and 361 deletions

View File

@@ -96,10 +96,17 @@ def verify_tables(doc_pred: DsDocument, doc_true: DsDocument):
for i, row in enumerate(true_item.data):
for j, col in enumerate(true_item.data[i]):
# print("true: ", true_item.data[i][j])
# print("pred: ", pred_item.data[i][j])
assert (
true_item.data[i][j].text == pred_item.data[i][j].text
), "table-cell does not have the same text"
assert (
true_item.data[i][j].obj_type == pred_item.data[i][j].obj_type
), "table-cell does not have the same type"
return True
@@ -156,9 +163,13 @@ def verify_conversion_result(
), f"Mismatch in PDF cell prediction for {input_path}"
# assert verify_output(
# doc_pred, doc_true
# doc_pred, doc_true
# ), f"Mismatch in JSON prediction for {input_path}"
assert verify_tables(
doc_pred, doc_true
), f"verify_tables(doc_pred, doc_true) mismatch for {input_path}"
assert verify_md(
doc_pred_md, doc_true_md
), f"Mismatch in Markdown prediction for {input_path}"