-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBirdsuite
65 lines (46 loc) · 2.68 KB
/
Birdsuite
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
####it's a known issue that mac couldn't keep auth keys after restart
ssh-add ~/.ssh/pdc*
rsync --progress /media/sf_share/*fam pdc_fan:/mnt/SCRATCH/0408/997BRCA_0416
##birdsuite, annotaion file was from https://www.thermofisher.com/us/en/home/life-science/microarray-analysis/microarray-data-analysis/genechip-array-annotation-files.html#/legacy=affymetrix.com
./make_bim.py --annotations=/mnt/SCRATCH/0408/GenomeWideSNP_6.na35.annot.csv --calls=/mnt/SCRATCH/0408/brca1---call.txt --output=/mnt/SCRATCH/0408/BRCA1-bim
./make_bed.py --calls=/mnt/SCRATCH/0408/brca1---call.txt --confs=/mnt/SCRATCH/0408/brca1---conf.txt --output=/mnt/SCRATCH/0408/BRCA1
##This option uses X chromosome data to determine sex (i.e. based on heterozygosity rates) and flags individuals for whom the reported sex in the PED file does not match the estimated sex (given genomic data). A PROBLEM arises if the two sexes do not match
##11 samples are mismatched. All 11 samples are reported male in TCGA.
##sort bim file by chromosome id
sort -n BRCA0416_2.bim > BRCA0416_02.bim
##make sure the chr id is in correct order
awk '{print $1}' BRCA0416_02.bim | uniq
##fam file should be ready as well, the following cmd will convert binary BED fileset back into a standard PED/MAP fileset
/mnt/SCRATCH/plink1.9/plink --bfile BRCA1 --recode --out BRCA_ped
##merge multiple PED/MAP filesets. Need to specify a single file contains a list of files.
##nano Allfile.txt, looks like below
BRCA0416_1_ped.ped BRCA0416_1_ped.map
BRCA0416_2_ped.ped BRCA0416_2_ped.map
BRCA0416_3_ped.ped BRCA0416_3_ped.map
BRCA0416_4_ped.ped BRCA0416_4_ped.map
##merge multiple PED/MAP filesets.It seems ped/map/bim/bed/fam should be all prepared ahead of time.
/mnt/SCRATCH/plink1.9/plink --file BRCA0416_1_ped --merge-list allfile.txt --recode --out testmerge
##convert ped/map file into a VCF file
plink --ped BRCA1_ped.ped --map BRCA1_ped.map --recode vcf --out BRCA_vcf
##zip vcf file
vcf-sort BRCA1.vcf | bgzip -c > BRCA1.vcf.gz
##creat index
tabix BRCA1.vcf.gz
##split VCF file by chromosome
for i in {0..26}; do tabix BRCA1.vcf.gz $i > chr$i.vcf; done
mv chr23.vcf chrX.vcf
mv chr24.vcf chrY.vcf
mv chr26.vcf chrM.vcf
##remove chr25 (PAR or XY), remove chr0 (undefined)
rm chr25.vcf
rm chr0.vcf
##replace 26 with chrM, 24 with chrY, 23 with chrX
sed -i 's/^26/M/g' chrM.vcf
sed -i 's/^24/Y/g' chrY.vcf
sed -i 's/^23/X/g' chrX.vcf
##add header to all splitted vcfs.
for i in {1..22} X Y M; do cat header chr$i.vcf >> new_chr$i.vcf; done
##Create a sorted *.vcf.gz file using VCFtools and tabix (including bgzip)
for i in new_*; do vcf-sort $i | bgzip -c > BRCA1_$i.gz; done
##save to local computer
rsync --progress pdc_fan:/mnt/SCRATCH/0408/BRCA1_chr* /media/sf_share/