-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBA2C.py
36 lines (31 loc) · 1.13 KB
/
BA2C.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
datasetFile = open("datasets/rosalind_ba2c.txt", "r")
text = datasetFile.readline().strip()
k = int(datasetFile.readline().strip())
profile = []
for str in datasetFile:
profile.append(map(lambda x: float(x), str.strip().split(" ")))
print("Find a Profile-most Probable k-mer in a String")
def kmerProbability (kmer, profile):
probabilities = []
baseToRow = { 'A': 0, 'C': 1, 'G': 2, 'T': 3 }
for i in range(len(kmer)):
row = baseToRow[kmer[i]]
col = i
probabilities.append(profile[row][i])
product = 1
for j in range(len(probabilities)):
product = product * probabilities[j]
return product
def mostProbableKmer (text, k, profile):
mostProbable = None
maxProbability = None
for i in range(len(text) - k + 1):
kmer = text[i:i+k]
probability = kmerProbability(kmer, profile)
if (maxProbability is None) or (probability > maxProbability):
maxProbability = probability
mostProbable = kmer
return mostProbable
solution = mostProbableKmer(text, k, profile)
outputFile = open("output/rosalind_ba2c.txt", "w")
outputFile.write(solution)