Skip to content

Commit

Permalink
more minor bug fixes, moving towards v0.10.3
Browse files Browse the repository at this point in the history
  • Loading branch information
Jon Palmer authored and Jon Palmer committed Sep 6, 2017
1 parent cc871a0 commit 59b7222
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 23 deletions.
29 changes: 8 additions & 21 deletions bin/amptk-extract_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,12 @@ def dereplicate(input, output):
seqs[sequence] = rec.description
else:
#check length of taxonomy string, keep one with more tax info
newTax = rec.description.split(',')
oldTax = seqs.get(sequence).split(',')
newHeader = rec.description.split(';tax=')
oldHeader = seqs.get(sequence).split(';tax=')
newTax = newHeader[-1].split(',')
oldTax = oldHeader[-1].split(',')
newID = newHeader[0]
oldID = oldHeader[0]
newTaxLen = len(newTax)
oldTaxLen = len(oldTax)
if newTaxLen > oldTaxLen:
Expand All @@ -102,7 +106,7 @@ def dereplicate(input, output):
if newTax[-num] == oldTax[-num]:
lca = num-1
break
consensusTax = ','.join(oldTax[:-lca])
consensusTax = oldID+';tax='+','.join(oldTax[:-lca])
amptklib.log.debug("setting taxonomy to %s" % (consensusTax))
seqs[sequence] = consensusTax
#now write to file
Expand Down Expand Up @@ -314,22 +318,6 @@ def stripPrimer(records):
yield rec

def makeDB(input):
#need usearch for this, test to make sure version is ok with utax
usearch = args.usearch
try:
usearch_test = subprocess.Popen([usearch, '-version'], stdout=subprocess.PIPE).communicate()[0].rstrip()
except OSError:
amptklib.log.error("%s not found in your PATH, exiting." % usearch)
os._exit(1)
version = usearch_test.split(" v")[1]
majorV = version.split(".")[0]
minorV = version.split(".")[1]
if int(majorV) < 8 or (int(majorV) >= 8 and int(minorV) < 1):
amptklib.log.warning("USEARCH version: %s detected you need v8.1.1756 or above" % usearch_test)
os._exit(1)
else:
amptklib.log.info("USEARCH version: %s" % usearch_test)

db_details = args.out + '.udb.txt'
usearch_db = args.out + '.udb'
if args.trimming:
Expand All @@ -340,7 +328,6 @@ def makeDB(input):
details.write(db_string)
report = args.out + '.report.txt'


if args.create_db == 'utax':
#create log file for this to troubleshoot
utax_log = args.out + '.utax.log'
Expand Down Expand Up @@ -465,7 +452,7 @@ def worker(input):
if args.derep_fulllength:
Passed = amptklib.countfasta(OutName)
amptklib.log.info('{0:,}'.format(Passed) + ' records passed (%.2f%%)' % (Passed*100.0/SeqCount))
amptklib.log.info("Now dereplicating sequences (remove if sequence and header identical)")
amptklib.log.info("Now dereplicating sequences (collapsing identical sequences)")
derep_tmp = args.out + '.derep.extracted.fa'
os.rename(OutName, derep_tmp)
dereplicate(derep_tmp, OutName)
Expand Down
3 changes: 3 additions & 0 deletions bin/amptk-filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,9 @@ def __init__(self,prog):
#first calculate bleed out of mock community
#slice normalized dataframe to get only mock OTUs from table
mock_df = pd.DataFrame(norm_round, index=mock)
#if there are samples to drop, make sure they aren't being used in this calculation
if args.drop:
mock_df.drop(args.drop, axis=1, inplace=True)
#get total number of reads from mock OTUs from entire table
total = np.sum(np.sum(mock_df,axis=None))
#now drop the mock barcode sample
Expand Down
10 changes: 9 additions & 1 deletion bin/amptk-process_ion.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,15 @@ def processRead(input):
outputSeqFile.close()
inputSeqFile.close()
else:
shutil.copyfile(args.barcode_fasta, barcode_file)
#check for multi_samples and add if necessary
if args.multi == 'False':
shutil.copyfile(args.barcode_fasta, barcode_file)
else:
with open(barcode_file, 'w') as barcodeout:
with open(args.barcode_fasta, 'rU') as input:
for rec in SeqIO.parse(input, 'fasta'):
outname = args.multi+'.'+rec.id
barcodeout.write(">%s\n%s\n" % (outname, rec.seq))

#parse primers here so doesn't conflict with mapping primers
#look up primer db otherwise default to entry
Expand Down
2 changes: 1 addition & 1 deletion lib/amptklib.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

ASCII = {'!':'0','"':'1','#':'2','$':'3','%':'4','&':'5',"'":'6','(':'7',')':'8','*':'9','+':'10',',':'11','-':'12','.':'13','/':'14','0':'15','1':'16','2':'17','3':'18','4':'19','5':'20','6':'21','7':'22','8':'23','9':'24',':':'25',';':'26','<':'27','=':'28','>':'29','?':'30','@':'31','A':'32','B':'33','C':'34','D':'35','E':'36','F':'37','G':'38','H':'39','I':'40','J':'41','K':'42','L':'43','M':'44','N':'45','O':'46','P':'47','Q':'48','R':'49','S':'50'}

primer_db = {'fITS7': 'GTGARTCATCGAATCTTTG', 'ITS4': 'TCCTCCGCTTATTGATATGC', 'ITS1-F': 'CTTGGTCATTTAGAGGAAGTAA', 'ITS2': 'GCTGCGTTCTTCATCGATGC', 'ITS3': 'GCATCGATGAAGAACGCAGC', 'ITS4-B': 'CAGGAGACTTGTACACGGTCCAG', 'ITS1': 'TCCGTAGGTGAACCTGCGG', 'LR0R': 'ACCCGCTGAACTTAAGC', 'LR2R': 'AAGAACTTTGAAAAGAG', 'JH-LS-369rc': 'CTTCCCTTTCAACAATTTCAC', '16S_V3': 'CCTACGGGNGGCWGCAG', '16S_V4': 'GACTACHVGGGTATCTAATCC', 'ITS3_KYO2': 'GATGAAGAACGYAGYRAA', 'COI-F': 'GGTCAACAAATCATAAAGATATTGG', 'COI-R': 'GGWACTAATCAATTTCCAAATCC', '515FB': 'GTGYCAGCMGCCGCGGTAA', '806RB': 'GGACTACNVGGGTWTCTAAT'}
primer_db = {'fITS7': 'GTGARTCATCGAATCTTTG', 'ITS4': 'TCCTCCGCTTATTGATATGC', 'ITS1-F': 'CTTGGTCATTTAGAGGAAGTAA', 'ITS2': 'GCTGCGTTCTTCATCGATGC', 'ITS3': 'GCATCGATGAAGAACGCAGC', 'ITS4-B': 'CAGGAGACTTGTACACGGTCCAG', 'ITS1': 'TCCGTAGGTGAACCTGCGG', 'LR0R': 'ACCCGCTGAACTTAAGC', 'LR2R': 'AAGAACTTTGAAAAGAG', 'JH-LS-369rc': 'CTTCCCTTTCAACAATTTCAC', '16S_V3': 'CCTACGGGNGGCWGCAG', '16S_V4': 'GACTACHVGGGTATCTAATCC', 'ITS3_KYO2': 'GATGAAGAACGYAGYRAA', 'COI-F': 'GGTCAACAAATCATAAAGATATTGG', 'COI-R': 'GGWACTAATCAATTTCCAAATCC', '515FB': 'GTGYCAGCMGCCGCGGTAA', '806RB': 'GGACTACNVGGGTWTCTAAT', 'ITS4-B21': 'CAGGAGACTTGTACACGGTCC'}


degenNuc = [("R", "A"), ("R", "G"),
Expand Down

0 comments on commit 59b7222

Please sign in to comment.