ran pre-commit

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
2025-07-27 04:24:45 +00:00 · 2024-08-26 20:22:31 +02:00 · 2024-08-26 20:22:31 +02:00 · 24c0b9d4c9
commit 24c0b9d4c9
parent c64489a82c
2 changed files with 39 additions and 25 deletions
--- a/docling/models/ds_glm_model.py
+++ b/docling/models/ds_glm_model.py
@ -16,7 +16,9 @@ from docling.datamodel.document import ConvertedDocument
 class GlmModel:
    def __init__(self, config):
        self.config = config
-        self.model_names = self.config.get("model_names", "") #"language;term;reference"
+        self.model_names = self.config.get(
            "model_names", ""
        )  # "language;term;reference"
        load_pretrained_nlp_models()
        # model = init_nlp_model(model_names="language;term;reference")
        model = init_nlp_model(model_names=self.model_names)
--- a/tests/test_toplevel_functions.py
+++ b/tests/test_toplevel_functions.py
@ -1,5 +1,5 @@
 import json
 import glob
 import json
 from pathlib import Path
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
@ -8,6 +8,7 @@ from docling.document_converter import DocumentConverter
 GENERATE = False
 def get_pdf_paths():
    # Define the directory you want to search
@ -17,6 +18,7 @@ def get_pdf_paths():
    pdf_files = sorted(directory.rglob("*.pdf"))
    return pdf_files
 def verify_json(doc_pred_json, doc_true_json):
    if doc_pred_json.keys() != doc_true_json.keys():
@ -39,18 +41,28 @@ def verify_json(doc_pred_json, doc_true_json):
            pred_item = doc_pred_json["output"]["tables"][l]
            assert "data" in pred_item, f"`data` is in {pred_item}"
-            assert len(true_item["data"])==len(pred_item["data"]), "table does not have the same #-rows"
+            assert len(true_item["data"]) == len(
-            assert len(true_item["data"][0])==len(pred_item["data"][0]), "table does not have the same #-cols"
+                pred_item["data"]
            ), "table does not have the same #-rows"
            assert len(true_item["data"][0]) == len(
                pred_item["data"][0]
            ), "table does not have the same #-cols"
            for i, row in enumerate(true_item["data"]):
                for j, col in enumerate(true_item["data"][i]):
                    if "text" in true_item["data"][i][j]:
-                        assert "text" in pred_item["data"][i][j], "table-cell does not contain text"
+                        assert (
-                        assert true_item["data"][i][j]["text"]==pred_item["data"][i][j]["text"], "table-cell does not have the same text"
+                            "text" in pred_item["data"][i][j]
                        ), "table-cell does not contain text"
                        assert (
                            true_item["data"][i][j]["text"]
                            == pred_item["data"][i][j]["text"]
                        ), "table-cell does not have the same text"
    return True
 def verify_md(doc_pred_md, doc_true_md):
    return doc_pred_md == doc_true_md