-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathanalysis.R
144 lines (115 loc) · 4.7 KB
/
analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
###
# Instructions
# Rscript analysis.R men_sampled_shoe.csv men_fit.RData
# Rscript analysis.R women_sampled_shoe.csv women_fit.RData
# or
# Rscript analysis.R men_sampled_shoe.csv women_sampled_shoe.csv combined_fit.RData
###
args <- commandArgs(trailingOnly = TRUE)
# args <- c("women_sampled_shoe.csv","women_fit.RData")
# args <- c("men_sampled_shoe.csv", "women_sampled_shoe.csv", "combined_fit.RData")
input_perf_csv <- args[1]
output_rdata <- args[length(args)]
# read in the data
if(length(args) == 2){
perf_data <- read.csv(args[1], as.is = TRUE )
} else {
male_perf = read.csv(args[1], as.is = TRUE)
male_perf$sex = 0
female_perf = read.csv(args[2], as.is = TRUE)
female_perf$sex = 1
perf_data = rbind(male_perf, female_perf)
}
# convert to a day count
perf_data$date <- as.Date(perf_data$date)
perf_data$day_count <- as.numeric( perf_data$date - min(perf_data$date) )
# find the non-missing values
not_missing <- !is.na(perf_data$vaporfly)
n <- sum(not_missing)
# define the response
y <- perf_data$time_minutes[not_missing]
# define the vaporfly variable
int <- rep(1,n)
x1 <- as.numeric( perf_data$vaporfly[not_missing] )
X <- cbind(int,x1)
# runner factor and block matrix
f1 <- as.factor(perf_data$match_name[not_missing])
lev1 <- levels(f1)
lev1 <- levels(f1)
Z1 <- matrix(0, n, length(lev1) )
for(j in 1:ncol(Z1)){ Z1[,j] <- as.numeric( f1 == lev1[j] ) }
ZZ1 <- Z1 %*% t(Z1)
# course factor and block matrix
f2 <- as.factor(perf_data$marathon[not_missing])
lev2 <- levels(f2)
Z2 <- matrix(0, n, length(lev2) )
for(j in 1:ncol(Z2)){ Z2[,j] <- as.numeric( f2 == lev2[j] ) }
ZZ2 <- Z2 %*% t(Z2)
# individual race factor
f3 <- as.factor( paste(perf_data$marathon,perf_data$year)[not_missing] )
lev3 <- levels(f3)
Z3 <- matrix(0, n, length(lev3))
for(j in 1:ncol(Z3)){ Z3[,j] <- as.numeric( f3 == lev3[j] ) }
ZZ3 <- Z3 %*% t(Z3)
# days
day_count <- perf_data$day_count[not_missing]
# fit the models
if(length(args) == 2){ # only 2 arguments if not combined sex
fit1 <- lme4::lmer( y ~ x1 + (1|f1) + (1|f2) + (1|f3), REML = TRUE )
fit2 <- lme4::lmer( log(y) ~ x1 + (1|f1) + (1|f2) + (1|f3), REML = TRUE )
vc <- lme4::VarCorr(fit1)
covparms1 <- c(vc$f1,vc$f2,vc$f3,attr(vc,"sc")^2)
vc <- lme4::VarCorr(fit2)
covparms2 <- c(vc$f1,vc$f2,vc$f3,attr(vc,"sc")^2)
cat("##########################################\n")
cat(" Fit to untransformed data\n")
cat("##########################################\n")
print(summary(fit1))
cat("\nConfidence interval for vaporfly effect\n")
print( round( fit1@beta[2] + qnorm(0.95)*sqrt(vcov(fit1)[2,2])*c(-1,1), 3 ) )
cat("\n##########################################\n")
cat(" Fit to log transformed data\n")
cat("##########################################\n")
print(summary(fit2))
cat("\nConfidence interval for vaporfly effect\n")
print( round( fit2@beta[2] + qnorm(0.95)*sqrt(vcov(fit2)[2,2])*c(-1,1), 3 ) )
# multiplicative effects
cat("\nmultiplicative effect\n")
print( exp(fit2@beta[2]) )
print( 1-exp(fit2@beta[2]) )
# save the results
save(fit1, fit2, y, x1, f1, f2, f3, day_count, X, ZZ1, ZZ2, ZZ3,
covparms1, covparms2, file = output_rdata)
} else { # fit model with both sexes together
f4 <- as.factor(perf_data$sex[not_missing])
fit1 <- lme4::lmer( y ~ x1*f4 + (1|f1) + (1|f2) + (1|f3), REML = TRUE)
fit2 <- lme4::lmer( log(y) ~ x1*f4 + (1|f1) + (1|f2) + (1|f3), REML = TRUE )
vc <- lme4::VarCorr(fit1)
covparms1 <- c(vc$f1,vc$f2,vc$f3,attr(vc,"sc")^2)
vc <- lme4::VarCorr(fit2)
covparms2 <- c(vc$f1,vc$f2,vc$f3,attr(vc,"sc")^2)
cat("##########################################\n")
cat(" Fit to untransformed data\n")
cat("##########################################\n")
print(summary(fit1))
cat("\nConfidence interval for vaporfly effects\n")
print( round( fit1@beta[2] + qnorm(0.95)*sqrt(vcov(fit1)[2,2])*c(-1,1), 3 ) )
print( round( fit1@beta[2] + fit1@beta[4] +
qnorm(0.95)*sqrt(sum( vcov(fit1)[c(2,4),c(2,4)] ) )*c(-1,1), 3 ) )
cat("\n##########################################\n")
cat(" Fit to log transformed data\n")
cat("##########################################\n")
print(summary(fit2))
cat("\nConfidence interval for vaporfly effect\n")
print( round( fit2@beta[2] + qnorm(0.95)*sqrt(vcov(fit2)[2,2])*c(-1,1), 3 ) )
print( round( fit2@beta[2] + fit2@beta[4] +
qnorm(0.95)*sqrt(sum( vcov(fit2)[c(2,4),c(2,4)] ) )*c(-1,1), 3 ) )
# multiplicative effects
cat("\nmultiplicative effect\n")
print( exp(fit2@beta[2]) )
print( exp(fit2@beta[2]+fit2@beta[4]) )
print( 1-exp(fit2@beta[2]) )
print( 1-exp(fit2@beta[2]+fit2@beta[4]) )
save(fit1, fit2, y, x1, f1, f2, f3, f4, day_count, X, ZZ1, ZZ2, ZZ3,
covparms1, covparms2, file = output_rdata)
}