-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsequence_logos.py
executable file
·83 lines (60 loc) · 1.98 KB
/
sequence_logos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#! /usr/bin/env python3
'''
A script to generate sequence logos from motif data.
'''
import os
import re
import click
import pandas as pd
import weblogolib as wl
def generate_logo(seqfile, title):
'''
Generate the sequence logo from the specified sequences.
Args:
seqfile (str): The path to a sequence file.
'''
with open(seqfile, 'r') as fh:
seqlen = len(fh.readline().rstrip('\n'))
fh.seek(0)
seqs = wl.read_seq_data(fh)
data = wl.LogoData.from_seqs(seqs)
options = wl.LogoOptions()
options.title = title
options.fineprint = ''
#options.stack_width = 16
options.first_index = -1 * int(seqlen / 2)
form = wl.LogoFormat(data, options)
eps = wl.eps_formatter(data, form)
eps_file = seqfile[:-4] + '.eps'
with open(eps_file, 'wb') as fh:
fh.write(eps)
def generate_logos(motifs, seqs, output_dir):
'''
Given the extracted motifs, generate sequence logos.
Args:
motifs (pandas.DataFrame)
seqs (list): The list of foreground sequences.
output_dir (str): The directory to which to save logos.
'''
if not os.path.isdir(output_dir):
os.mkdir(output_dir)
for idx, row in motifs.iterrows():
motif = row.loc['motif']
regex = re.compile(motif)
matches = [s for s in seqs if regex.match(s)]
match_file = os.path.join(output_dir, 'motif_' + motif + '_motif.txt')
with open(match_file, 'w') as fh:
fh.write('\n'.join(matches))
generate_logo(match_file, motif)
@click.command()
@click.argument('motif-file')
@click.argument('sequence-file')
@click.option('--output-dir', '-o',
help='The destination directory for saving logos',
default='logos')
def main(motif_file, sequence_file, output_dir):
with open(sequence_file) as fh:
seqs = [s.rstrip() for s in fh]
generate_logos(pd.read_csv(motif_file), seqs, output_dir)
if __name__ == '__main__':
main()