feat: new torch-based docling models (#120)

---------

Signed-off-by: Maxim Lysak <mly@zurich.ibm.com>
Co-authored-by: Maxim Lysak <mly@zurich.ibm.com>
This commit is contained in:
Maxim Lysak
2024-10-03 18:42:33 +02:00
committed by GitHub
parent 9ebbbc1245
commit 2422f706a1
30 changed files with 1159 additions and 1185 deletions

View File

@@ -77,6 +77,8 @@ def verify_tables(doc_pred: DsDocument, doc_true: DsDocument):
assert doc_pred.tables is not None, "no tables predicted, but expected in doc_true"
# print("Expected number of tables: {}, result: {}".format(len(doc_true.tables), len(doc_pred.tables)))
assert len(doc_true.tables) == len(
doc_pred.tables
), "document has different count of tables than expected."
@@ -96,8 +98,9 @@ def verify_tables(doc_pred: DsDocument, doc_true: DsDocument):
for i, row in enumerate(true_item.data):
for j, col in enumerate(true_item.data[i]):
# print("true: ", true_item.data[i][j])
# print("pred: ", pred_item.data[i][j])
# print("true: ", true_item.data[i][j].text)
# print("pred: ", pred_item.data[i][j].text)
# print("")
assert (
true_item.data[i][j].text == pred_item.data[i][j].text