Skip to content

Commit fe4ea83

Browse files
Merge pull request #2 from beliveau-lab/master
Added the probeDB utility version which includes strand info, removed…
2 parents f2b3d5c + 0f18db2 commit fe4ea83

File tree

1 file changed

+23
-20
lines changed

1 file changed

+23
-20
lines changed

utilities/probeDB.py

+23-20
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
A script to generate a database of refseq annotations with a row for each
99
probe that overlaps the region.
1010
11+
Changed on August 29, 2019 to include strand information for the probe.
12+
1113
"""
1214

1315
import argparse
@@ -17,17 +19,18 @@
1719
from Bio.Seq import Seq
1820
from Bio.Alphabet import IUPAC
1921
import pandas as pd
22+
import numpy as np
2023

2124
def check_polarity(row):
2225
""""Checks polarity and flips probe sequence if reference is +."""
23-
if row[14] == '+':
26+
if row[15] == '+':
2427
return str(Seq(row[3], IUPAC.unambiguous_dna).reverse_complement())
2528
else:
2629
return str(row[3])
2730

2831
def truncate_refseq(row):
2932
""""Truncates the Refseq column down to just accession."""
30-
version = row[12].split('_')[0] + '_' + row[12].split('_')[1]
33+
version = row[13].split('_')[0] + '_' + row[13].split('_')[1]
3134
accession = version.split('.')[0]
3235
return accession
3336

@@ -44,8 +47,6 @@ def getArgs(strInput=None):
4447
required=True, help="The name of the annotation file")
4548
parser.add_argument('-o', '--outputFile', action='store', type=str,
4649
required=False, help="The name for output file")
47-
parser.add_argument('-f', '--file', action='store_true', default=False,
48-
required=False, help="Run in file mode")
4950

5051
return parser.parse_args()
5152

@@ -65,22 +66,21 @@ def main():
6566
else:
6667
out_name = args.outputFile
6768

68-
if args.file:
69-
probes = []
70-
with open(args.folder) as probe_file:
69+
70+
# create a list of all files in directory
71+
files = glob.glob(folder + "/*")
72+
73+
# creates a list of the all of the probes from each
74+
# probe file, each represented as a single string
75+
probes = []
76+
for f in files:
77+
with open(f) as probe_file:
7178
for line in probe_file:
7279
probes.append(line.strip())
73-
else:
74-
# create a list of all files in directory
75-
files = glob.glob(folder + "/*")
76-
77-
# creates a list of the all of the probes from each
78-
# probe file, each represented as a single string
79-
probes = []
80-
for f in files:
81-
with open(f) as probe_file:
82-
for line in probe_file:
83-
probes.append(line.strip())
80+
81+
# add strand column to probes
82+
for i in range(0, len(probes), 1):
83+
probes[i] = probes[i] + '\t+'
8484

8585
# creates a bedtool object with the entire probe set for the assembly
8686
probe_bedtool = pybedtools.BedTool(probes)
@@ -111,11 +111,14 @@ def main():
111111
# check polarity of annotations, function flips probe sequence if necessary
112112
probes[3] = probes.apply(check_polarity, axis = 1)
113113

114+
# flip probe strand too
115+
probes[9] = np.where(probes[15] == '+', '-', '+')
116+
114117
# convert refseq column to just accession
115-
probes[12] = probes.apply(truncate_refseq, axis = 1)
118+
probes[13] = probes.apply(truncate_refseq, axis = 1)
116119

117120
# drop unnecessary columns
118-
probes.drop([9, 10, 11, 13, 14, 15],
121+
probes.drop([10, 11, 12, 14, 15, 16],
119122
axis = 1,
120123
inplace = True)
121124

0 commit comments

Comments
 (0)