-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquantile_normalization.py
62 lines (51 loc) · 1.38 KB
/
quantile_normalization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import numpy as np
import pandas
#cancer = open("cluster_features/Normal_Early/Hierarchical_200_Normal_Early.csv","r")
cancer = open("cat_transpose.csv","r")
next(cancer)
#next(cancer)
cancer = cancer.readlines()
colnames=cancer[0]
colnames=colnames.strip("\n").split(",")
colu=[]
for i in colnames:
#print(i.strip('""'))
colu.append(i.strip('""'))
cancer= cancer[1:]#print(len(cancer))
cancer_main=[]
for i in cancer:
i=i.strip("\n")
i=i.strip('"').split(",")
#print(i[3])
if i[1] == 'Cancer':
#print(i[1])
i[1] = 'Cancer'
else:
i[1] = 'Normal'
i[0]=i[0].strip('"')
i[2]=i[2].strip('"')
cancer_main.append(i)
cancer_x=[]
for i in cancer_main:
cancer_x.append(i[3:])
cancer_meta = []
for i in cancer_main:
cancer_meta.append(i[:3])
cancer_met=[]
for i in cancer_x:
cancer_met.append([float(j.strip('"')) for j in i])
cancer_target=[]
for i in cancer_main:
cancer_target.append(i[1])
df = pandas.DataFrame(cancer_met)
df1 = pandas.DataFrame(cancer_meta)
#print(cancer_met)
rank_mean = df.stack().groupby(df.rank(method='first').stack().astype(int)).mean()
cancer_met = df.rank(method='min').stack().astype(int).map(rank_mean).unstack()
colmeta=colu[:3]
colval=colnames[3:]
df1.columns=colmeta
print(df1)
print(cancer_met)
df1.to_csv("meta.csv",index_label='Number')
cancer_met.to_csv("quantile.csv",index_label='Number')