Fix tests to have unique document_hashes per test

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2024-09-18 17:01:42 +02:00
parent 1b4c80acff
commit d3eb6c5dd1
2 changed files with 8 additions and 5 deletions

View File

@ -18,7 +18,7 @@ jobs:
run: poetry install --all-extras run: poetry install --all-extras
- name: Testing - name: Testing
run: | run: |
poetry run pytest -vvv -s tests poetry run pytest -v tests
- name: Run examples - name: Run examples
run: | run: |
for file in examples/*.py; do for file in examples/*.py; do

View File

@ -17,7 +17,7 @@ def test_doc_path():
def test_text_cell_counts(): def test_text_cell_counts():
pdf_doc = Path("./tests/data/redp5695.pdf") pdf_doc = Path("./tests/data/redp5695.pdf")
doc_backend = DoclingParseDocumentBackend(pdf_doc, "123456xyz") doc_backend = DoclingParseDocumentBackend(pdf_doc, "123456xyz5")
for page_index in range(0, doc_backend.page_count()): for page_index in range(0, doc_backend.page_count()):
last_cell_count = None last_cell_count = None
@ -36,7 +36,7 @@ def test_text_cell_counts():
def test_get_text_from_rect(test_doc_path): def test_get_text_from_rect(test_doc_path):
doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz") doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz4")
page_backend: DoclingParsePageBackend = doc_backend.load_page(0) page_backend: DoclingParsePageBackend = doc_backend.load_page(0)
# Get the title text of the DocLayNet paper # Get the title text of the DocLayNet paper
@ -46,10 +46,11 @@ def test_get_text_from_rect(test_doc_path):
ref = "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" ref = "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis"
assert textpiece.strip() == ref assert textpiece.strip() == ref
doc_backend.unload()
def test_crop_page_image(test_doc_path): def test_crop_page_image(test_doc_path):
doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz") doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz3")
page_backend: DoclingParsePageBackend = doc_backend.load_page(0) page_backend: DoclingParsePageBackend = doc_backend.load_page(0)
# Crop out "Figure 1" from the DocLayNet paper # Crop out "Figure 1" from the DocLayNet paper
@ -57,8 +58,10 @@ def test_crop_page_image(test_doc_path):
scale=2, cropbox=BoundingBox(l=317, t=246, r=574, b=527) scale=2, cropbox=BoundingBox(l=317, t=246, r=574, b=527)
) )
# im.show() # im.show()
doc_backend.unload()
def test_num_pages(test_doc_path): def test_num_pages(test_doc_path):
doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz") doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz2")
doc_backend.page_count() == 9 doc_backend.page_count() == 9
doc_backend.unload()