forked from wushyer/RGAAT_v2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathblat_smp.pl
144 lines (116 loc) · 2.96 KB
/
blat_smp.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/usr/bin/perl
#run blat with Multi CPUs
#author: gaoshh@big.ac.cn wushy@big.ac.cn
use File::Spec::Functions qw/rel2abs/;
use File::Basename;
use Getopt::Long;
use threads;
use Bio::Seq;
use Bio::SeqIO;
#get options
my %opts;
GetOptions(\%opts,
"h",
"q:s","t:s","o:s",
"w:s","p:s"
);
if ((!$opts{q})||(!$opts{t})||(!$opts{o})||($opts{h}))
{
print "
usage: $0 -h -q <query> -t <target> -o output_psl -p num_cpus -- other BLAT params
Main options:
-h show the help info
-q <query> BLAT query
-t <target> BLAT target
-o output_psl the output file
-w work_path specify the path where the script works
-p cpu how many parts to split(default 4)
-- other BLAT params could be passed in with --, see BLAT manual for more info
";
exit(1);
}
#settings
#initialize parameters
($QUERY,$TARGET,$OUTPUT,$WORKDIR,$CPUS)=@opts{'q','t','o','w','p'};
#wrap BLAT params
$PARAMS=join " ",@ARGV;
#default
$WORKDIR = ($WORKDIR)?$WORKDIR:rel2abs("./"); ##current dir as workdir
$CPUS = ($CPUS > 0)? $CPUS : 4; #use 4 cpus
#check parameters and environment
$QUERY=rel2abs($QUERY);
$TARGET=rel2abs($TARGET);
unless (-e $QUERY) {die "The QUERY file doesn't exist or unreadable: $QUERY\n";}
unless (-e $TARGET) {die "The TARGET file doesn't exist or unreadable: $TARGET\n";}
if (system("which blat")){
die "Can't find blat in your system\n";
}
$BLAT = `which blat`;
chomp($BLAT);
unless (-e $WORKDIR) {mkdir $WORKDIR;}
$WORKDIR=rel2abs($WORKDIR);
#map{mkdir "$WORKDIR/$_";}('split','worker');
#link file
system("ln -sf $QUERY $WORKDIR/query.fa");
system("ln -sf $TARGET $WORKDIR/target.fa");
#split input fasta
splitter("$WORKDIR/query.fa",$CPUS) || die "Split input fasta error\n";
#generate shell command
my @cmd;
for $i(1..$CPUS){
$cmd="$BLAT -noHead $WORKDIR/target.fa $WORKDIR/query.fa.split.$i $WORKDIR/output.split.$i $PARAMS > $WORKDIR/log.$i\n";
push @cmd,$cmd;
}
#wrap
my @threads;
for $i(1..$CPUS){
$cmd = shift @cmd;
$thread = threads->create(\&wrapper,$cmd);
$threads[$i] = $thread;
}
#recycle
my $failcount;
my $ret;
for $i(1..$CPUS){
if ($ret=$threads[$i]->join()){
$failcount++;
print "Thread $i exited with non-zero $ret, your result may not be complete, suggest retry...\n";
}
}
#cat output
die "Some parts of the work end up with error, please check and retry!\n" if ($failcount);
$cmd="cat ";
for $i(1..$CPUS){
$cmd.="$WORKDIR/output.split.$i ";
}
$cmd.="> $OUTPUT";
system $cmd;
print "Blat finished! Your result could be found: $OUTPUT\n";
#thread
sub wrapper{
#in: cmd
#out: retcode
my $cmd = shift @_;
print "start CMD: $cmd\n";
return system($cmd);
}
sub splitter{
my $fasta=shift @_;
my $parts=shift @_;
for $n(1..$parts){
open $n,">","$fasta.split.$n" || die;
}
open FASTA,"<",$fasta || die;
my $l=0;
while(<FASTA>){
if (/^>/){
$l++;
$n=$l%$parts;$n=$parts if ($n==0);
}
print $n $_;
}
for $n(1..$parts){
close OUT;
}
close FASTA;
}