Skip to content

Commit 0f18db2

Browse files
Added the probeDB utility version which includes strand info, removed the previous version that had the extra command line bit for working with files
1 parent f2b3d5c commit 0f18db2

File tree

1 file changed

+23
-20
lines changed

1 file changed

+23
-20
lines changed

utilities/probeDB.py

+23-20
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
A script to generate a database of refseq annotations with a row for each
99
probe that overlaps the region.
1010
11+
Changed on August 29, 2019 to include strand information for the probe.
12+
1113
"""
1214

1315
import argparse
@@ -17,17 +19,18 @@
1719
from Bio.Seq import Seq
1820
from Bio.Alphabet import IUPAC
1921
import pandas as pd
22+
import numpy as np
2023

2124
def check_polarity(row):
2225
""""Checks polarity and flips probe sequence if reference is +."""
23-
if row[14] == '+':
26+
if row[15] == '+':
2427
return str(Seq(row[3], IUPAC.unambiguous_dna).reverse_complement())
2528
else:
2629
return str(row[3])
2730

2831
def truncate_refseq(row):
2932
""""Truncates the Refseq column down to just accession."""
30-
version = row[12].split('_')[0] + '_' + row[12].split('_')[1]
33+
version = row[13].split('_')[0] + '_' + row[13].split('_')[1]
3134
accession = version.split('.')[0]
3235
return accession
3336

@@ -44,8 +47,6 @@ def getArgs(strInput=None):
4447
required=True, help="The name of the annotation file")
4548
parser.add_argument('-o', '--outputFile', action='store', type=str,
4649
required=False, help="The name for output file")
47-
parser.add_argument('-f', '--file', action='store_true', default=False,
48-
required=False, help="Run in file mode")
4950

5051
return parser.parse_args()
5152

@@ -65,22 +66,21 @@ def main():
6566
else:
6667
out_name = args.outputFile
6768

68-
if args.file:
69-
probes = []
70-
with open(args.folder) as probe_file:
69+
70+
# create a list of all files in directory
71+
files = glob.glob(folder + "/*")
72+
73+
# creates a list of the all of the probes from each
74+
# probe file, each represented as a single string
75+
probes = []
76+
for f in files:
77+
with open(f) as probe_file:
7178
for line in probe_file:
7279
probes.append(line.strip())
73-
else:
74-
# create a list of all files in directory
75-
files = glob.glob(folder + "/*")
76-
77-
# creates a list of the all of the probes from each
78-
# probe file, each represented as a single string
79-
probes = []
80-
for f in files:
81-
with open(f) as probe_file:
82-
for line in probe_file:
83-
probes.append(line.strip())
80+
81+
# add strand column to probes
82+
for i in range(0, len(probes), 1):
83+
probes[i] = probes[i] + '\t+'
8484

8585
# creates a bedtool object with the entire probe set for the assembly
8686
probe_bedtool = pybedtools.BedTool(probes)
@@ -111,11 +111,14 @@ def main():
111111
# check polarity of annotations, function flips probe sequence if necessary
112112
probes[3] = probes.apply(check_polarity, axis = 1)
113113

114+
# flip probe strand too
115+
probes[9] = np.where(probes[15] == '+', '-', '+')
116+
114117
# convert refseq column to just accession
115-
probes[12] = probes.apply(truncate_refseq, axis = 1)
118+
probes[13] = probes.apply(truncate_refseq, axis = 1)
116119

117120
# drop unnecessary columns
118-
probes.drop([9, 10, 11, 13, 14, 15],
121+
probes.drop([10, 11, 12, 14, 15, 16],
119122
axis = 1,
120123
inplace = True)
121124

0 commit comments

Comments
 (0)