From d3eb6c5dd15b3ba110f891d81760f99c811d7f35 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Wed, 18 Sep 2024 17:01:42 +0200 Subject: [PATCH] Fix tests to have unique document_hashes per test Signed-off-by: Christoph Auer --- .github/workflows/checks.yml | 2 +- tests/test_backend_docling_parse.py | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 6f185d91..8e92e76e 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -18,7 +18,7 @@ jobs: run: poetry install --all-extras - name: Testing run: | - poetry run pytest -vvv -s tests + poetry run pytest -v tests - name: Run examples run: | for file in examples/*.py; do diff --git a/tests/test_backend_docling_parse.py b/tests/test_backend_docling_parse.py index f9442b05..4aafdc31 100644 --- a/tests/test_backend_docling_parse.py +++ b/tests/test_backend_docling_parse.py @@ -17,7 +17,7 @@ def test_doc_path(): def test_text_cell_counts(): pdf_doc = Path("./tests/data/redp5695.pdf") - doc_backend = DoclingParseDocumentBackend(pdf_doc, "123456xyz") + doc_backend = DoclingParseDocumentBackend(pdf_doc, "123456xyz5") for page_index in range(0, doc_backend.page_count()): last_cell_count = None @@ -36,7 +36,7 @@ def test_text_cell_counts(): def test_get_text_from_rect(test_doc_path): - doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz") + doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz4") page_backend: DoclingParsePageBackend = doc_backend.load_page(0) # Get the title text of the DocLayNet paper @@ -46,10 +46,11 @@ def test_get_text_from_rect(test_doc_path): ref = "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" assert textpiece.strip() == ref + doc_backend.unload() def test_crop_page_image(test_doc_path): - doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz") + doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz3") page_backend: DoclingParsePageBackend = doc_backend.load_page(0) # Crop out "Figure 1" from the DocLayNet paper @@ -57,8 +58,10 @@ def test_crop_page_image(test_doc_path): scale=2, cropbox=BoundingBox(l=317, t=246, r=574, b=527) ) # im.show() + doc_backend.unload() def test_num_pages(test_doc_path): - doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz") + doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz2") doc_backend.page_count() == 9 + doc_backend.unload()