forked from brantp/rtd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread_quality_statistics.py
executable file
·70 lines (52 loc) · 1.79 KB
/
read_quality_statistics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python
import preprocess_radtag_lane
smartopen = preprocess_radtag_lane.smartopen
import os,sys,numpy
def get_fastq_properties(fq):
if smartopen(fq).read(1) == '@':
lnum = 4
else:
lnum = 1
print >> sys.stderr, 'fastq format lnum: %s' % lnum
baseQ = None
qfh = smartopen(fq)
while baseQ is None:
t,r,q = preprocess_radtag_lane.next_read_from_fh(qfh,lnum)
baseQ = preprocess_radtag_lane.get_baseQ(q)
qfh.close()
print >> sys.stderr, 'fastq format baseQ: %s' % baseQ
readlen = len(r)
print >> sys.stderr, 'fastq format readlen: %s' % readlen
return lnum,baseQ,readlen
if __name__ == "__main__":
if len(sys.argv) == 2:
fq = sys.argv[1]
boundstr = "0:"
else:
fq, boundstr = sys.argv[1:]
start,end = boundstr.split(':')
start = int(start)
lnum,baseQ,readlen = get_fastq_properties(fq)
if end == '':
end = readlen
readcount = preprocess_radtag_lane.get_read_count(fq)
qsc_n = 0
qsc_tot = numpy.zeros(readlen)
qsc_by_read = []
fh = smartopen(fq)
tickon = readcount/1000
for i in range(readcount):
if i % tickon == 0:
print >> sys.stderr, '\r%0.1f' % ((i/float(readcount)) * 100),
t,r,q = preprocess_radtag_lane.next_read_from_fh(fh,lnum)
qsc = [ord(c)-baseQ for c in q]
qsc_n += 1
qsc_tot += qsc
qsc_by_read.append(numpy.mean(qsc[start:end]))
qsc_by_base = list(qsc_tot/qsc_n)
print >> sys.stderr, 'write per-base mean qual ...',
open(fq+'-per_base_qual.list','w').write(qsc_by_base.__repr__())
print >> sys.stderr, 'done'
print >> sys.stderr, 'write per-read qual ..',
open(fq+'-per_read_qual.list','w').write(qsc_by_read.__repr__())
print >> sys.stderr, 'done'