-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathon_target_score_calculator.py
59 lines (55 loc) · 3.41 KB
/
on_target_score_calculator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
'''
Calculates the on-target score for an sgRNA
Input: 30mer
Output: On-target score
Run as: python on_target_score_calculator.py <30mer>
'''
import sys,math
import numpy as np
def calc_score(s):
s_list = list(s)
s_20mer = s[4:24]
nuc_hash = {'A':0, 'T':1, 'C':2, 'G':3}
score = 0.597636154
gc = s_20mer.count('G')+s_20mer.count('C')
gc_low = -0.202625894
gc_high = -0.166587752
if gc < 10:
gc_val = abs(gc-10)
score = score+(gc_val*gc_low)
elif gc > 10:
gc_val = gc-10
score = score+(gc_val*gc_high)
#rows[1-30]cols['ATCG']
sing_nuc_hash = {'G2':-0.275377128,'A3':-0.323887456,'C3':0.172128871,'C4':-0.100666209,'C5':-0.20180294, \
'G5':0.245956633,'A6':0.036440041,'C6':0.098376835,'C7':-0.741181291,\
'G7':-0.393264397,'A12':-0.466099015,'A15':0.085376945,'C15':-0.013813972,\
'A16':0.272620512,'C16':-0.119022648,'T16':-0.285944222,'A17':0.097454592,\
'G17':-0.17554617,'C18':-0.345795451,'G18':-0.678096426,'A19':0.22508903,\
'C19':-0.507794051,'G20':-0.417373597,'T20':-0.054306959,'G21':0.379899366,\
'T21':-0.090712644,'C22':0.057823319,'T22':-0.530567296,'T23':-0.877007428,\
'C24':-0.876235846,'G24':0.278916259,'T24':-0.403102218,'A25':-0.077300704,\
'C25':0.287935617,'T25':-0.221637217,'G28':-0.689016682,'T28':0.117877577,\
'C29':-0.160445304,'G30':0.386342585}
#score_mat = np.matrix('0 0 0 0;0 0 0 -0.275377128;-0.323887456 0 0.172128871 0;0 0 -0.100666209 0;0 0 -0.20180294 0.245956633;0.036440041 0 0.098376835 0;0 0 -0.741181291 -0.393264397;0 0 0 0;0 0 0 0;0 0 0 0;0 0 0 0;-0.466099015 0 0 0;0 0 0 0;0 0 0 0;0.085376945 0 -0.013813972 0;0.272620512 -0.285944222 -0.119022648 0;0.097454592 0 0 -0.17554617;0 0 -0.345795451 -0.678096426;0.22508903 0 -0.507794051 0;0 -0.054306959 0 -0.417373597;0 -0.090712644 0 0.379899366;0 -0.530567296 0.057823319 0;0 -0.877007428 0 0;0 -0.403102218 -0.876235846 0.278916259;-0.077300704 -0.221637217 0.287935617 0;0 0 0 0;0 0 0 0;0 0.117877577 0 -0.689016682;0 0 -0.160445304 0;0 0 0 0.386342585')
dinuc_hash = {'GT2':-0.625778696,'GC5':0.300043317,'AA6':-0.834836245,'TA6':0.760627772,'GG7':-0.490816749,'GG12':-1.516907439,'TA12':0.7092612,'TC12':0.496298609,'TT12':-0.586873894,'GG13':-0.334563735,'GA14':0.76384993,'GC14':-0.53702517,'TG17':-0.798146133,'GG19':-0.66680873,'TC19':0.353183252,'CC20':0.748072092,'TG20':-0.367266772,'AC21':0.568209132,'CG21':0.329072074,'GA21':-0.836456755,'GG21':-0.782207584,'TC22':-1.029692957,'CG23':0.856197823,'CT23':-0.463207679,'AA24':-0.579492389,'AG24':0.649075537,'AG25':-0.077300704,'CG25':0.287935617,'TG25':-0.221637217,'GT27':0.117877577,'GG29':-0.697740024}
for i,nuc in enumerate(s_list):
key = nuc+str(i+1)
if sing_nuc_hash.has_key(key):
nuc_score = sing_nuc_hash[key]
else:
nuc_score = 0
#nuc_score = score_mat[i,nuc_hash[nuc]]
score = score+nuc_score
if i<29:
dinuc = nuc+s[i+1]+str(i+1)
if dinuc in dinuc_hash.keys():
score = score+dinuc_hash[dinuc]
partial_score = math.e**-score
final_score = 1/(1+partial_score)
return final_score
if __name__ == '__main__':
args = sys.argv
sgrna = args[1]
score = calc_score(sgrna)
print 'sgRNA On-target score: '+str(score)