mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-23 18:45:00 +00:00
Revert "Add multi-page TIFF test data and verification tests"
This reverts commit 130a10e2d9
.
This commit is contained in:
parent
d571f36299
commit
2aab66288b
BIN
tests/data/tiff/multipage_2pages.tif
vendored
BIN
tests/data/tiff/multipage_2pages.tif
vendored
Binary file not shown.
BIN
tests/data/tiff/multipage_3pages.tif
vendored
BIN
tests/data/tiff/multipage_3pages.tif
vendored
Binary file not shown.
BIN
tests/data/tiff/multipage_4pages.tif
vendored
BIN
tests/data/tiff/multipage_4pages.tif
vendored
Binary file not shown.
BIN
tests/data/tiff/single_page.tif
vendored
BIN
tests/data/tiff/single_page.tif
vendored
Binary file not shown.
@ -1,139 +0,0 @@
|
||||
"""
|
||||
Test file for multi-page TIFF functionality.
|
||||
|
||||
This file tests that multi-page TIFF files are properly handled by the
|
||||
image-to-PDF conversion logic in the PDF backend.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def get_tiff_test_paths():
|
||||
"""Get all TIFF test files from the test data directory."""
|
||||
directory = Path("./tests/data/tiff/")
|
||||
if not directory.exists():
|
||||
return []
|
||||
|
||||
# List all TIFF files in the directory
|
||||
tiff_files = sorted(directory.rglob("*.tif")) + sorted(directory.rglob("*.tiff"))
|
||||
return tiff_files
|
||||
|
||||
|
||||
def test_tiff_test_files_exist():
|
||||
"""Verify that TIFF test files exist and have the expected properties."""
|
||||
tiff_paths = get_tiff_test_paths()
|
||||
|
||||
assert len(tiff_paths) > 0, "No TIFF test files found in tests/data/tiff/"
|
||||
|
||||
# Expected test files
|
||||
expected_files = {
|
||||
"single_page.tif": 1,
|
||||
"multipage_2pages.tif": 2,
|
||||
"multipage_3pages.tif": 3,
|
||||
"multipage_4pages.tif": 4,
|
||||
}
|
||||
|
||||
found_files = {p.name: p for p in tiff_paths}
|
||||
|
||||
for expected_file, expected_pages in expected_files.items():
|
||||
assert expected_file in found_files, f"Expected test file {expected_file} not found"
|
||||
|
||||
# Verify the file has the expected number of pages
|
||||
with Image.open(found_files[expected_file]) as img:
|
||||
if hasattr(img, 'n_frames'):
|
||||
actual_pages = img.n_frames
|
||||
else:
|
||||
actual_pages = 1
|
||||
|
||||
assert actual_pages == expected_pages, (
|
||||
f"File {expected_file} should have {expected_pages} pages, "
|
||||
f"but has {actual_pages}"
|
||||
)
|
||||
|
||||
|
||||
def test_multipage_tiff_image_properties():
|
||||
"""Test that our multi-page TIFF files have the correct structure."""
|
||||
tiff_paths = get_tiff_test_paths()
|
||||
|
||||
if not tiff_paths:
|
||||
print("SKIP: No TIFF test files found")
|
||||
return
|
||||
|
||||
multipage_files = [p for p in tiff_paths if "multipage" in p.name]
|
||||
|
||||
for tiff_path in multipage_files:
|
||||
with Image.open(tiff_path) as img:
|
||||
# Should be a multi-page image
|
||||
assert hasattr(img, 'n_frames'), f"{tiff_path.name} should be multi-page"
|
||||
assert img.n_frames > 1, f"{tiff_path.name} should have more than 1 frame"
|
||||
|
||||
# Verify we can seek through all frames
|
||||
frames_accessible = 0
|
||||
try:
|
||||
for i in range(img.n_frames):
|
||||
img.seek(i)
|
||||
frames_accessible += 1
|
||||
|
||||
# Verify frame is valid
|
||||
assert img.size[0] > 0 and img.size[1] > 0, (
|
||||
f"Frame {i} in {tiff_path.name} has invalid size"
|
||||
)
|
||||
except EOFError:
|
||||
pass
|
||||
|
||||
assert frames_accessible == img.n_frames, (
|
||||
f"Could only access {frames_accessible} of {img.n_frames} frames "
|
||||
f"in {tiff_path.name}"
|
||||
)
|
||||
|
||||
|
||||
def test_single_page_tiff_properties():
|
||||
"""Test that single-page TIFF files have the correct structure."""
|
||||
tiff_paths = get_tiff_test_paths()
|
||||
|
||||
if not tiff_paths:
|
||||
print("SKIP: No TIFF test files found")
|
||||
return
|
||||
|
||||
single_page_files = [p for p in tiff_paths if "single_page" in p.name]
|
||||
|
||||
if not single_page_files:
|
||||
print("SKIP: No single-page TIFF test files found")
|
||||
return
|
||||
|
||||
for tiff_path in single_page_files:
|
||||
with Image.open(tiff_path) as img:
|
||||
# Should be a single-page image
|
||||
if hasattr(img, 'n_frames'):
|
||||
assert img.n_frames == 1, f"{tiff_path.name} should have exactly 1 frame"
|
||||
|
||||
# Verify image is valid
|
||||
assert img.size[0] > 0 and img.size[1] > 0, (
|
||||
f"{tiff_path.name} has invalid size"
|
||||
)
|
||||
|
||||
|
||||
def get_expected_pages_from_filename(filename: str) -> int:
|
||||
"""Extract expected number of pages from test file names."""
|
||||
filename_lower = filename.lower()
|
||||
|
||||
if "single_page" in filename_lower:
|
||||
return 1
|
||||
elif "2pages" in filename_lower:
|
||||
return 2
|
||||
elif "3pages" in filename_lower:
|
||||
return 3
|
||||
elif "4pages" in filename_lower:
|
||||
return 4
|
||||
else:
|
||||
# For any other files, assume single page
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# When run directly, execute the tests
|
||||
test_tiff_test_files_exist()
|
||||
test_multipage_tiff_image_properties()
|
||||
test_single_page_tiff_properties()
|
||||
print("All TIFF test file validation passed!")
|
Loading…
Reference in New Issue
Block a user