-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path09_output_comparison_full.R
169 lines (157 loc) · 11.3 KB
/
09_output_comparison_full.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
library(microbenchmark)
library(parallel)
library(data.table)
source("./02_utility_functions.R")
metadata = readRDS("metadata.RDS")
# metadata = metadata[sapply(metadata, function(x) x$type) != "TMT"]
# Additional options to check:
# Remove 1 feature proteins
# Do not remove few measurements
get_comparison_df = function(path) {
all_files = list.files(path, full.names = TRUE)
files = data.table(file = all_files)
files[, dataset := gsub("v[0-9].RDS", "", file)]
files[, v := stringr::str_extract(file, "v[0-9]")]
splitted = split(files, files$dataset)
stats_lf = rbindlist(lapply(splitted, function(test) {
cbind(dataset = unique(gsub(paste0(path, "/"), "", test$dataset)),
getStats(readRDS(test[v == "v3", file]),
readRDS(test[v == "v4", file])))
}))
stats_lf
}
problems = list()
save_single_output = function(metadata, output_path = "./single_outputs/", remove_single_feature = FALSE, remove_few = TRUE) {
remove_few_lf = ifelse(remove_few, "remove", "keep")
for (dataset in metadata) {
tryCatch({
names = names(dataset)
dataset = lapply(dataset, function(x) gsub("./datasets/", "", x))
names(dataset) = names
if (dataset$tool == "MaxQuant") {
evidence = readRDS(paste0("./processed_data/", gsub("tsv|csv|xls|txt", "RDS", dataset$evidence_path)))
protein_groups = readRDS(paste0("./processed_data/", gsub("tsv|csv|xls|txt", "RDS", dataset$protein_groups)))
annotation = readRDS(paste0("./processed_data/", gsub("tsv|csv|xls|txt", "RDS", dataset$annotation)))
if (dataset$type == "TMT") {
v3 = MSstatsTMT::MaxQtoMSstatsTMTFormat(evidence, protein_groups, annotation,
rmPSM_withfewMea_withinRun = remove_few,
rmProtein_with1Feature = remove_single_feature)
v4 = MSstatsConvert::MaxQtoMSstatsTMTFormat(evidence, protein_groups, annotation,
rmPSM_withfewMea_withinRun = remove_few,
rmProtein_with1Feature = remove_single_feature)
} else {
v3 = MSstats::MaxQtoMSstatsFormat(evidence, annotation, protein_groups,
removeProtein_with1Peptide = remove_single_feature,
fewMeasurements = remove_few_lf)
v4 = MSstatsConvert::MaxQtoMSstatsFormat(evidence, annotation, protein_groups,
removeProtein_with1Peptide = remove_single_feature,
fewMeasurements = remove_few_lf)
}
} else if (dataset$tool == "DIAUmpire") {
fragments = readRDS(paste0("./processed_data/", gsub("tsv|csv|xls|txt", "RDS", dataset$fragment_path)))
peptides = readRDS(paste0("./processed_data/", gsub("tsv|csv|xls|txt", "RDS", dataset$peptide_path)))
proteins = readRDS(paste0("./processed_data/", gsub("tsv|csv|xls|txt", "RDS", dataset$protein_path)))
annotation = readRDS(paste0("./processed_data/", gsub("tsv|csv|xls|txt", "RDS", dataset$annotation)))
v3 = MSstats::DIAUmpiretoMSstatsFormat(fragments, peptides, proteins, annotation,
removeProtein_with1Feature = remove_single_feature,
fewMeasurements = remove_few_lf)
v4 = MSstatsConvert::DIAUmpiretoMSstatsFormat(fragments, peptides, proteins, annotation,
removeProtein_with1Feature = remove_single_feature,
fewMeasurements = remove_few_lf)
} else if (dataset$tool == "OpenMS") {
input = readRDS(paste0("./processed_data/", gsub("tsv|csv|xls|txt", "RDS", dataset$input_path)))
if (dataset$type == "TMT") {
v3 = MSstatsTMT::OpenMStoMSstatsTMTFormat(input,
rmPSM_withfewMea_withinRun = remove_few,
rmProtein_with1Feature = remove_single_feature)
v4 = MSstatsConvert::OpenMStoMSstatsTMTFormat(input,
rmPSM_withfewMea_withinRun = remove_few,
rmProtein_with1Feature = remove_single_feature)
} else {
v3 = MSstats::OpenMStoMSstatsFormat(input,
removeProtein_with1Feature = remove_single_feature,
fewMeasurements = remove_few_lf)
v4 = MSstatsConvert::OpenMStoMSstatsFormat(input,
removeProtein_with1Feature = remove_single_feature,
fewMeasurements = remove_few_lf)
}
} else {
input = readRDS(paste0("./processed_data/", gsub("tsv|csv|xls|txt", "RDS", dataset$input_path)))
annotation = readRDS(paste0("./processed_data/", gsub("tsv|csv|xls|txt", "RDS", dataset$annotation_path)))
input = as.data.frame(input)
annotation = as.data.frame(annotation)
if (dataset$tool == "PD") {
if (dataset$type == "TMT") {
v3 = MSstatsTMT::PDtoMSstatsTMTFormat(input, annotation,
rmPSM_withfewMea_withinRun = remove_few,
rmProtein_with1Feature = remove_single_feature)
v4 = MSstatsConvert::PDtoMSstatsTMTFormat(input, annotation,
rmPSM_withfewMea_withinRun = remove_few,
rmProtein_with1Feature = remove_single_feature)
} else {
v3 = MSstats::PDtoMSstatsFormat(input, annotation,
removeProtein_with1Peptide = remove_single_feature,
fewMeasurements = remove_few_lf)
v4 = MSstatsConvert::PDtoMSstatsFormat(input, annotation,
removeProtein_with1Peptide = remove_single_feature,
fewMeasurements = remove_few_lf)
}
}
if (dataset$tool == "OpenSWATH") {
v3 = MSstats::OpenSWATHtoMSstatsFormat(input, annotation,
removeProtein_with1Feature = remove_single_feature,
fewMeasurements = remove_few_lf)
v4 = MSstatsConvert::OpenSWATHtoMSstatsFormat(input, annotation,
removeProtein_with1Feature = remove_single_feature,
fewMeasurements = remove_few_lf)
}
if (dataset$tool == "Progenesis") {
v3 = MSstats::ProgenesistoMSstatsFormat(input, annotation,
removeProtein_with1Peptide = remove_single_feature,
fewMeasurements = remove_few_lf)
v4 = MSstatsConvert::ProgenesistoMSstatsFormat(input, annotation,
removeProtein_with1Peptide = remove_single_feature,
fewMeasurements = remove_few_lf)
}
if (dataset$tool == "Skyline") {
v3 = MSstats::SkylinetoMSstatsFormat(input, annotation,
removeProtein_with1Feature = remove_single_feature,
fewMeasurements = remove_few_lf)
v4 = MSstatsConvert::SkylinetoMSstatsFormat(input, annotation,
removeProtein_with1Feature = remove_single_feature,
fewMeasurements = remove_few_lf)
}
if (dataset$tool == "SpectroMine") {
v3 = MSstatsTMT::SpectroMinetoMSstatsTMTFormat(input, annotation,
rmPSM_withfewMea_withinRun = remove_few,
rmProtein_with1Feature = remove_single_feature)
v4 = MSstatsConvert::SpectroMinetoMSstatsTMTFormat(input, annotation,
rmPSM_withfewMea_withinRun = remove_few,
rmProtein_with1Feature = remove_single_feature)
}
if (dataset$tool == "Spectronaut") {
v3 = MSstats::SpectronauttoMSstatsFormat(input, annotation,
removeProtein_with1Feature = remove_single_feature,
fewMeasurements = remove_few_lf)
v4 = MSstatsConvert::SpectronauttoMSstatsFormat(input, annotation,
removeProtein_with1Feature = remove_single_feature,
fewMeasurements = remove_few_lf)
}
}
saveRDS(v3, file = paste0(output_path, dataset$folder_path, "_", "v3", ".RDS"))
saveRDS(v4, file = paste0(output_path, dataset$folder_path, "_", "v4", ".RDS"))
}, error = function(e) {
problems <<- c(problems, dataset$folder_path)
print(e)
})
}
}
# metadata = metadata[sapply(metadata, function(x) x$tool) == "Skyline"]
metadata = metadata[!(sapply(metadata, function(x) x$name) %in% c("Azimifa2014"))]
# metadata = metadata[sapply(metadata, function(x) x$name) == "Rakus2016"]
save_single_output(metadata, "./single_outputs/")
compare_default = get_comparison_df("./single_outputs/")
save_single_output(metadata, output_path = "./single_outputs_no_few/", remove_single_feature = FALSE, remove_few = FALSE)
compare_no_few = get_comparison_df("./single_outputs_no_few/")
save_single_output(metadata, output_path = "./single_outputs_no_singles/", remove_single_feature = TRUE, remove_few = TRUE)
compare_no_singles = get_comparison_df("./single_outputs_no_singles/")