-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsc11_03_create_table_with_ok_slurm.R
137 lines (106 loc) · 5.88 KB
/
sc11_03_create_table_with_ok_slurm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# Set working directory
setwd("/my/working/directory/data")
# Functions ---------------------------------------------------------------
# Function to check if the required packages are installed and to load the library
usePackage <- function(p){
if (!is.element(p, installed.packages()[,1])) install.packages(p, dep = TRUE)
library(p, character.only = TRUE)
}
# Libraries ---------------------------------------------------------------
usePackage("data.table")
usePackage("tidyverse")
usePackage("stringr")
# Directories -------------------------------------------------------------
bo <- "/my/working/directory/partitional_clustering/bo/"
bg <- "/my/working/directory/data/partitional_clustering/bg/"
gg <- "/my/working/directory/partitional_clustering/gg/"
basin102 <-"basin_102_812813/indices/"
basin88 <- "basin_88_649217/indices/"
basin59 <- "basin_59_1173421/indices/"
basin75 <- "basin_75_560810/indices/"
basin20 <- "basin_20_1320241/indices/"
basin47 <- "basin_47_481455/indices/"
basin06 <- "basin_00_000006/indices/"
files <- c(# Basin 102_812813
paste0(bo, basin102, "idx_table_60k_102_812813_bo_seed256_nstart500.csv"),
paste0(bo, basin102, "idx_table_60k_102_812813_bo_seed543_nstart500.csv"),
paste0(bo, basin102, "idx_table_60k_102_812813_bo_seed1234_nstart500.csv"),
paste0(bg, basin102, "idx_table_60k_102_812813_bg_seed256_nstart500.csv"),
paste0(bg, basin102, "idx_table_60k_102_812813_bg_seed543_nstart500.csv"),
paste0(bg, basin102, "idx_table_60k_102_812813_bg_seed1234_nstart500.csv"),
# Basin 88_649217
paste0(bo, basin88, "idx_table_60k_88_649217_bo_seed256_nstart500.csv"),
paste0(bo, basin88, "idx_table_60k_88_649217_bo_seed543_nstart500.csv"),
paste0(bo, basin88, "idx_table_60k_88_649217_bo_seed1234_nstart500.csv"),
paste0(bg, basin88, "idx_table_60k_88_649217_bg_seed256_nstart500.csv"),
paste0(bg, basin88, "idx_table_60k_88_649217_bg_seed543_nstart500.csv"),
paste0(bg, basin88, "idx_table_60k_88_649217_bg_seed1234_nstart500.csv"),
# Basin 59_1173421
paste0(bo, basin59, "idx_table_60k_59_1173421_bo_seed256_nstart500.csv"),
paste0(bo, basin59, "idx_table_60k_59_1173421_bo_seed543_nstart500.csv"),
paste0(bo, basin59, "idx_table_60k_59_1173421_bo_seed1234_nstart500.csv"),
paste0(bg, basin59, "idx_table_60k_59_1173421_bg_seed256_nstart500.csv"),
paste0(bg, basin59, "idx_table_60k_59_1173421_bg_seed543_nstart500.csv"),
paste0(bg, basin59, "idx_table_60k_59_1173421_bg_seed1234_nstart500.csv"),
# Basin 75_560810
paste0(bo, basin75, "idx_table_60k_75_560810_bo_seed256_nstart500.csv"),
paste0(bo, basin75, "idx_table_60k_75_560810_bo_seed543_nstart500.csv"),
paste0(bo, basin75, "idx_table_60k_75_560810_bo_seed1234_nstart500.csv"),
paste0(bg, basin75, "idx_table_60k_75_560810_bg_seed256_nstart500.csv"),
paste0(bg, basin75, "idx_table_60k_75_560810_bg_seed543_nstart500.csv"),
paste0(bg, basin75, "idx_table_60k_75_560810_bg_seed1234_nstart500.csv"),
# Basin 20_1320241
paste0(bo, basin20, "idx_table_60k_20_1320241_bo_seed256_nstart500.csv"),
paste0(bo, basin20, "idx_table_60k_20_1320241_bo_seed543_nstart500.csv"),
paste0(bo, basin20, "idx_table_60k_20_1320241_bo_seed1234_nstart500.csv"),
paste0(bg, basin20, "idx_table_60k_20_1320241_bg_seed256_nstart500.csv"),
paste0(bg, basin20, "idx_table_60k_20_1320241_bg_seed543_nstart500.csv"),
paste0(bg, basin20, "idx_table_60k_20_1320241_bg_seed1234_nstart500.csv"),
# Basin 47_481455
paste0(bo, basin47, "idx_table_60k_47_481455_bo_seed256_nstart500.csv"),
paste0(bo, basin47, "idx_table_60k_47_481455_bo_seed543_nstart500.csv"),
paste0(bo, basin47, "idx_table_60k_47_481455_bo_seed1234_nstart500.csv"),
paste0(bg, basin47, "idx_table_60k_47_481455_bg_seed256_nstart500.csv"),
paste0(bg, basin47, "idx_table_60k_47_481455_bg_seed543_nstart500.csv"),
paste0(bg, basin47, "idx_table_60k_47_481455_bg_seed1234_nstart500.csv"),
# Basin 00_000006
paste0(gg, basin06, "idx_table_60k_00_000006_gg_seed256_nstart500.csv"),
paste0(gg, basin06, "idx_table_60k_00_000006_gg_seed543_nstart500.csv"),
paste0(gg, basin06, "idx_table_60k_00_000006_gg_seed1234_nstart500.csv"))
# Input data --------------------------------------------------------------
first <- TRUE
for(i in 1:39){
idx_tbl <- read_csv(files[i])
file_name <- strsplit(files[i], split = "/")[[1]][[12]]
cunit <- strsplit(file_name, split ="_")[[1]][[4]]
basinID <- strsplit(file_name, split ="_")[[1]][[5]]
vset <- strsplit(file_name, split ="_")[[1]][[6]]
rnum <- strsplit(file_name, split ="_")[[1]][[7]]
rnum <- as.numeric(str_extract(rnum, "[0-9]+"))
nstr <- strsplit(file_name, split ="_")[[1]][[8]]
nstr<- as.numeric(str_extract(nstr, "[0-9]+"))
bst_ncl <- idx_tbl %>%
filter(indices == "davies_bouldin") %>%
mutate(bst_val= min(values)) %>%
filter(bst_val==values) %>%
select(.,-bst_val) %>%
mutate(CompUnit = cunit,
BasinID = basinID,
Set = vset,
SEED = rnum,
NSTART = nstr) %>%
rename(NCL = k) %>%
select(CompUnit, BasinID, Set, SEED, NSTART, NCL)
if(first == TRUE){
tbl <- bst_ncl
first <- FALSE
}else{
tbl <- tbl %>%
bind_rows(bst_ncl)
}
}
tbl <- as.data.table(tbl)
path <- "/my/working/directory/data/partitional_clustering/"
fwrite(tbl, paste0(path, "/bst_ncl.txt"), sep =" ")
# Exit R ------------------------------------------------------------------
quit(save = "no")