-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest_apoholo.py
160 lines (123 loc) · 9.82 KB
/
test_apoholo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import unittest
import prepare
import apoholo
import os
import sys
import shlex
from apoholo import load_precompiled_data, Query, QueryResult, parse_query
from common import get_default_workdir, read_file
# TODO always run tests with clean temporary workdir e.g. 'tmp_test/workdir'
workdir = get_default_workdir()
def exists_not_empty(path):
return os.path.exists(path) and os.path.getsize(path) > 0
def non_blank_lines(path):
"""
Number of non-blank lines in a text file, returns 0 if file doesn't exist
"""
count = 0
if os.path.exists(path):
with open(path) as file:
for line in file:
if line.strip():
count += 1
return count
def count_files(dir, suffix):
return len([f for f in os.listdir(dir) if f.endswith(suffix) and exists_not_empty(os.path.join(dir, f))])
# T01 prefix to ensure it is run first
# TODO find better way to enforce test order
class T01_Prepare(unittest.TestCase):
def test_prepare_script(self):
exit_code = prepare.main([])
self.assertEqual(0, exit_code)
#self.assertTrue(exists_not_empty(workdir + '/SIFTS/pdb_chain_uniprot_dict.txt'))
#self.assertTrue(exists_not_empty(workdir + '/SIFTS/pdb_chain_uniprot_REVERSE_SPnum.txt'))
#self.assertTrue(exists_not_empty(workdir + '/SIFTS/pdb_chain_uniprot_dict.bin'))
#self.assertTrue(exists_not_empty(workdir + '/SIFTS/pdb_chain_uniprot_REVERSE_SPnum.bin'))
self.assertTrue(exists_not_empty(workdir + '/SIFTS/uniprot_segments_observed_dict.txt'))
self.assertTrue(exists_not_empty(workdir + '/SIFTS/uniprot_segments_observed_REVERSE_SPnum.txt'))
self.assertTrue(exists_not_empty(workdir + '/SIFTS/uniprot_segments_observed_dict.bin'))
self.assertTrue(exists_not_empty(workdir + '/SIFTS/uniprot_segments_observed_REVERSE_SPnum.bin'))
class T02_Apoholo(unittest.TestCase):
def setUp(self):
self.precompiled_data = load_precompiled_data(workdir)
# Test successful query
def tst_query(self, args_str, expect_apo=0, expect_holo=0) -> QueryResult:
argv = shlex.split(args_str) # split but preserve '...' as substrings
print("Testing with args:", argv)
args = apoholo.parse_args(argv)
res = apoholo.process_query(args.query, workdir, args, self.precompiled_data)
print("Query result:", res)
if expect_apo > 0:
assert res.num_apo_chains >= expect_apo, f"Found less APO chains ({res.num_apo_chains} vs {expect_apo})" # may find more due to future data
assert non_blank_lines(res.result_dir + '/results_apo.csv') == 1 + expect_apo, "Bad results_apo.csv"
if expect_holo > 0:
assert res.num_holo_chains >= expect_holo, f"Found less HOLO chains ({res.num_holo_chains} vs {expect_holo})" # may find more due to future data
assert non_blank_lines(res.result_dir + '/results_holo.csv') == 1 + expect_holo, "Bad results_holo.csv"
# Test produced structure files
if expect_apo + expect_holo > 0:
assert count_files(res.result_dir + '/structure_files', '.cif.gz') >= 1 + expect_apo + expect_holo, "Failed to produce right number of .cif.gz files"
return res
# TODO instead of running main, test by running subprocess and capture stdout/stderr, test for presence of expected error messages
def tst_main_fail(self, args_str):
argv = shlex.split(args_str) # split but preserve '...' as substrings
print("Testing with args:", argv)
with self.assertRaises(BaseException):
exit_code = apoholo.main(argv)
def test_parse_query(self):
assert parse_query('1a73', autodetect_lig=True, water_as_ligand_auto=False, nonstd_rsds_as_lig_auto=False, d_aa_as_lig_auto=False) == Query(struct='1a73', chains='ALL', ligands=None, position=None, autodetect_lig=1, water_as_ligand_auto=0, nonstd_rsds_as_lig_auto=0, d_aa_as_lig_auto=0)
assert parse_query('1a73 A,B ZN', autodetect_lig=False, water_as_ligand_auto=False, nonstd_rsds_as_lig_auto=False, d_aa_as_lig_auto=False) == Query(struct='1a73', chains='A,B', ligands='ZN', position=None, autodetect_lig=0, water_as_ligand_auto=0, nonstd_rsds_as_lig_auto=0, d_aa_as_lig_auto=0)
assert parse_query('1a73 ALL ZN,MG', autodetect_lig=False, water_as_ligand_auto=False, nonstd_rsds_as_lig_auto=False, d_aa_as_lig_auto=False) == Query(struct='1a73', chains='ALL', ligands='ZN,MG',position=None, autodetect_lig=0, water_as_ligand_auto=0, nonstd_rsds_as_lig_auto=0, d_aa_as_lig_auto=0)
assert parse_query('1a73 A', autodetect_lig=True, water_as_ligand_auto=False, nonstd_rsds_as_lig_auto=False, d_aa_as_lig_auto=False) == Query(struct='1a73', chains='A', ligands=None, position=None, autodetect_lig=1, water_as_ligand_auto=0, nonstd_rsds_as_lig_auto=0, d_aa_as_lig_auto=0)
def test_simple_queries(self):
self.tst_query("--query '1a73 A,B ZN' ", expect_apo=0, expect_holo=32)
self.tst_query("--query '1a73 ALL ZN' ", expect_apo=0, expect_holo=32)
self.tst_query("--query '1a73 * ZN' ", expect_apo=0, expect_holo=32)
self.tst_query("--query '1a73' ", expect_apo=0, expect_holo=32)
self.tst_query("--query '1a73 A' ", expect_apo=0, expect_holo=16)
self.tst_query("--query '1a73 A ZN,MG' ", expect_apo=0, expect_holo=16)
def test_advanced_queries(self):
self.tst_query("--query '3CQV A HEM' ", expect_apo=6, expect_holo=5)
self.tst_query("--query '3fav all zn' ", expect_apo=2, expect_holo=0)
self.tst_query("--query '2hka all c3s' ", expect_apo=2, expect_holo=0) # bovine NPC2 complex with cholesterol sulfate
self.tst_query("--query '2v57 A,C prl' ", expect_apo=4, expect_holo=0) # SS changes in transcriptional regulator LfrR in complex with proflavine
self.tst_query("--query '1fmk A HOH 1011' ", expect_apo=18, expect_holo=3) # (hard target) Water molecule in the interface of two domains that undergo extensive conformational changes upon ligand binding (v0.4.6 apo 25)
self.tst_query("--query '1aro P HG 904' ", expect_apo=45, expect_holo=0) # (previously apo 19-21) fragmented UniProt candidates, to use for testing UNP overlap calculation
self.tst_query("--query '1pkz A tyr 9' --water_as_ligand 1", expect_apo=9, expect_holo=88) # marian example, allosteric effects of HOH
def test_advanced_queries_B(self):
self.tst_query("--query '1a37 P sep 259' ", expect_apo=6, expect_holo=4) # Phosphoserine on non-uniprot chain (hard target)
self.tst_query("--query '1a37 P sep' ", expect_apo=6, expect_holo=4) # same without specifying position
self.tst_query("--query '1gb1' ", expect_apo=22, expect_holo=16) # v0.4.6 apo 20, holo 16. Apo NMR structure with solid state NMR candidates (threw error in previous versions)
def test_broad_search(self):
self.tst_query("--query '2v0v' ", expect_apo=8, expect_holo=24) # test for reverse search (this is a fully apo structure)
self.tst_query("--query '2v0v A,B' ", expect_apo=4, expect_holo=12) #
def test_interface_ligands_search(self):
self.tst_query("--query '1a73 E mg 205' ", expect_apo=4, expect_holo=12) # non-protein query chain
self.tst_query("--query '1a73 A mg' ", expect_apo=4, expect_holo=12) # interface ligand assigned nucleic acid chain in the PDB file
self.tst_query("--query '6XBY A adp,mg' ", expect_apo=7, expect_holo=2) # ADP is an interface ligand on non-query chain
self.tst_query("--query '3k0v B NAG 1' ", expect_apo=2, expect_holo=86) # ligand is on a non-uniprot chain that is remapped, it was not detected in previous versions (< 0.4.9)
def test_lig_bndng_chains_parsing(self):
self.tst_query("--query '1a73 ! MG' ", expect_apo=8, expect_holo=24) # "!" means that only query chains that bind the ligand(s) will be considered for the search
self.tst_query("--query '3vro ! PTR' ", expect_apo=7, expect_holo=0)
self.tst_query("--query '3n7y ! PTR' ", expect_apo=21, expect_holo=270) # local-only error (python 3.9.9): total number of cif files is less than total apo + holo results [case-insensitivity in Windows overwrites upper/lower case chains resulting in less cif files (e.g, 5cdwC, 5cdwc)]
self.tst_query("--query '1a73 ! MG,ZN' ", expect_apo=0, expect_holo=32)
def test_expected_failures(self):
self.tst_main_fail("--invalid_param ")
self.tst_main_fail("--query 'INVALID_QUERY X X X' ")
self.tst_main_fail("--query 'XXXX' ") # XXXX is not in the PDB
self.tst_main_fail("--query '1a73 \n XXXX' ") # should fail if at least one is not valid
self.tst_main_fail("--query '1apm E sep,ser' ") # standard residue without positional argument, should fail
self.tst_main_fail("--query '5aep ! hem' ") # "!" operator with ligand that does not bind any query chain
# TODO add more tests cases
def assert_progress(self, res, expected_content=None):
progress_file = res.result_dir + '/.progress'
assert os.path.exists(progress_file), ".progress file was not created"
assert non_blank_lines(progress_file) == 1, ".progress doesn't have exactly one line"
content = read_file(progress_file)
assert content == expected_content, f".progress file has invalid value. actual: '{content}' expected: '{expected_content}'"
def test_track_progress(self):
res = self.tst_query("--query '3CQV A HEM' --track_progress 1", expect_apo=6, expect_holo=5)
self.assert_progress(res, expected_content="11/11")
res = self.tst_query("--query '3fav all zn' --track_progress 1", expect_apo=2, expect_holo=0)
self.assert_progress(res, expected_content="2/2") # change expected content from "4/4")
if __name__ == '__main__':
sys.exit(unittest.main())