-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy pathtest_atlas.py
74 lines (59 loc) · 2.89 KB
/
test_atlas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""
Test suite for the Atlas similarity calculation functionality.
This test verifies that the main function correctly returns:
1. The most similar dataset from the atlas
2. Its corresponding configuration settings
3. The similarity score
The test ensures:
- Return value types are correct
- Similarity score is within valid range (0-1)
- Configuration dictionary contains all required cell type annotation methods
"""
import json
import sys
import pandas as pd
from dance.settings import ATLASDIR, DANCEDIR, SIMILARITYDIR
sys.path.append(str(ATLASDIR))
from demos.main import main
from dance import logger
def test_main():
# Construct test parameters with a sample Brain tissue dataset
class Args:
tissue = "Brain"
data_dir = str(DANCEDIR / "examples/tuning/temp_data/train/human")
source_file = "human_Brain364348b4-bc34-4fe1-a851-60d99e36cafa_data"
args = Args()
logger.info(f"testing main with args: {args}")
source_id = "3643"
# Execute main function with test parameters
ans_file, ans_conf, ans_value = main(args)
# Verify return value types and ranges
assert isinstance(ans_file, str), "ans_file should be a string type"
assert isinstance(ans_value, float), "ans_value should be a float type"
assert 0 <= ans_value <= 1, "Similarity value should be between 0 and 1"
# Verify configuration dictionary structure and content
expected_methods = ["cta_celltypist", "cta_scdeepsort", "cta_singlecellnet", "cta_actinn"]
assert isinstance(ans_conf, dict), "ans_conf should be a dictionary type"
assert set(ans_conf.keys()) == set(expected_methods), "ans_conf should contain all expected methods"
assert all(isinstance(v, str) for v in ans_conf.values()), "All configuration values should be string type"
# Verify consistency with Excel spreadsheet results
data = pd.read_excel(SIMILARITYDIR / f"data/new_sim/{args.tissue.lower()}_similarity.xlsx", sheet_name=source_id,
index_col=0)
reduce_error = False
in_query = True
# Read weights
with open(
SIMILARITYDIR /
f"data/similarity_weights_results/{'reduce_error_' if reduce_error else ''}{'in_query_' if in_query else ''}sim_dict.json",
encoding='utf-8') as f:
sim_dict = json.load(f)
feature_name = sim_dict[args.tissue.lower()]["feature_name"]
w1 = sim_dict[args.tissue.lower()]["weight1"]
w2 = 1 - w1
# Calculate similarity in Excel
data.loc["similarity"] = data.loc[feature_name] * w1 + data.loc["metadata_sim"] * w2
expected_file = data.loc["similarity"].idxmax()
expected_value = data.loc["similarity", expected_file]
# Verify result consistency with Excel
assert abs(ans_value - expected_value) < 1e-4, "Calculated similarity value does not match Excel value"
assert ans_file == expected_file, "Selected most similar dataset does not match Excel result"