-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathss_calculations.Rmd
173 lines (142 loc) · 5.85 KB
/
ss_calculations.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
---
title: "sample size calculations, various RCTs and surveys"
author: "A.Amstutz"
date: "2023-10-18"
output:
html_document:
keep_md: yes
toc: yes
toc_float: yes
code_folding: hide
pdf_document:
toc: yes
---
# Load packages
```{r load packages, echo=TRUE, message=FALSE, warning=FALSE}
library(tidyverse)
library(readxl)
library(writexl)
library(here)
library(kableExtra)
library(ggplot2)
library(pwr)
```
# Sample size for a binary outcome / individual randomized trial
```{r, include=FALSE}
# Define the parameters
p_int <- 0.55 # Estimated proportion in control group
p_cont <- 0.50 # Estimated proportion in intervention group
alpha <- 0.05 # Significance level (usually 0.05 for a 95% confidence interval)
power <- 0.80 # Desired power (usually 0.80 for 80% power, or 0.9)
# calculate the sample size using pwr, two-sided (effect could go either way)
sample_size_1arm <- pwr.2p.test(h = ES.h(p_int, p_cont),
sig.level = alpha,
power = power)
sample_size <- sample_size_1arm$n * 2
# print
cat("Required Sample Size:", round(sample_size, 0))
#####
# Define the parameters for a "manual calculation"
alpha <- 0.05 # Significance level
power <- 0.80 # Desired power
p_int <- 0.55 # Estimated proportion in control group
p_cont <- 0.50 # Estimated proportion in intervention group
Z_alpha_half <- qnorm(1 - alpha / 2) # translate into Z-distribution -> equals 0.975 (95% CI)
Z_beta <- qnorm(power)
# calculate the sample size using the formula itself
sample_size_1arm <- ((Z_alpha_half + Z_beta)^2 * (p_int * (1 - p_int) + p_cont * (1 - p_cont))) / (p_int - p_cont)^2
sample_size <- sample_size_1arm * 2
# print
cat("Required sample size:", round(sample_size, 0))
```
# Sample size calculation for a continous outcome / individual randomized trial
```{r}
# Define the parameters
alpha <- 0.05 # Significance level (usually 0.05 for a 95% confidence interval)
power <- 0.80 # Desired power (usually 0.80 or 80% power)
mean_int <- 5 # Mean of the control group
mean_cont <- 0 # Mean of the intervention group
sd <- 10 # standard deviation of the outcome. We need to make a guess at the population standard deviation. If we have absolutely no idea, one rule of thumb is to take the difference between the maximum and minimum values and divide by 4. Let's say the maximum purchase is $10 and the minimum purchase is $1. Our estimated standard deviation is (10 - 1)/4 = 2.25
# Calculate the effect size
effect_size <- (mean_int - mean_cont) / sd
# Calculate the sample size for 1 arm
sample_size_1arm <- pwr.t.test(d = effect_size,
sig.level = alpha,
power = power)
sample_size <- sample_size_1arm$n * 2
# Print
cat("Required sample size:", round(sample_size, 0))
#####
# Define the parameters for a "manual calculation"
alpha <- 0.05 # Significance level
power <- 0.80 # Desired power
mean_int <- 5 # Mean of the control group
mean_cont <- 0 # Mean of the intervention group
sd <- 10 # Estimated standard deviation
# Calculate the effect size (delta)
delta <- abs(mean_int - mean_cont)
# Calculate the critical values for Z_alpha/2 and Z_beta
Z_alpha_half <- qnorm(1 - alpha / 2)
Z_beta <- qnorm(power)
# Calculate the sample size for 1 arm
sample_size_1arm <- (2 * (sd^2) / delta^2) * ((Z_alpha_half + Z_beta)^2)
sample_size <- sample_size_1arm * 2
# Print the sample size
cat("Required sample size:", round(sample_size, 0))
```
# Sample size calculation for a cluster randomized trial // continuous outcome
```{r}
# Define the parameters
alpha <- 0.05 # Significance level
power <- 0.80 # Desired power
mean_int <- 5 # Mean of the control group
mean_cont <- 0 # Mean of the intervention group
sd <- 0.015 # Estimated standard deviation of the outcome
cluster_size <- 2.4 # Average cluster size
icc <- 0.015 # Intracluster correlation coefficient
# Calculate the effect size
#delta <- abs(mean_int - mean_cont)
delta <- 0.006
# Calculate the critical values for Z_alpha/2 and Z_beta
Z_alpha_half <- qnorm(1 - alpha / 2)
Z_beta <- qnorm(power)
# Calculate the design effect (or Variance Inflation Factor (VIF))
design_effect <- 1 + (cluster_size - 1) * icc
# Calculate first the SS for an individual RCT and then inflate by the VIF
sample_size_1arm_ind <- (2 * (sd^2) / delta^2) * ((Z_alpha_half + Z_beta)^2)
sample_size_2arm_ind <- sample_size_1arm * 2
# Inflate for any attrition rate?
attrition <- 0.2
sample_size_2arm_ind_attr <- sample_size_2arm_ind + sample_size_2arm_ind*attrition
# Inflate for any non-uptake rate?
# Inflate for unequal cluster size?
# Inflate for clustering
sample_size_cluster_ind <- sample_size * design_effect
cat("Required sample size:", round(sample_size_cluster_ind, 0))
# Number of clusters
n_clusters <- sample_size_cluster_ind / cluster_size
cat("Required clusters:", round(n_clusters, 0))
```
# Sample size calculation for a cluster randomized trial // binary outcome
```{r}
# Define the parameters
alpha <- 0.05 # alpha level
power <- 0.90 # power
p_int <- 0.65 # Proportion of the binary outcome in the intervention group
p_cont <- 0.50 # Proportion of the binary outcome in the control group
cluster_size <- 20 # Average cluster size
icc <- 0.1 # Intracluster correlation coefficient
# Calculate the design effect
design_effect <- 1 + (cluster_size - 1) * icc
# Calculate the critical values for Z_alpha/2 and Z_beta
Z_alpha_half <- qnorm(1 - alpha / 2)
Z_beta <- qnorm(power)
# Calculate the total sample size (individual level) by inflating the SS for an individual RCT by the VIF
sample_size_1arm <- ((Z_alpha_half + Z_beta)^2 * (p_int * (1 - p_int) + p_cont * (1 - p_cont))) / (p_int - p_cont)^2
sample_size <- sample_size_1arm * 2
sample_size_cluster_ind <- sample_size * design_effect
cat("Required sample size:", round(sample_size_cluster_ind, 0))
# Number of clusters
n_clusters <- sample_size_cluster_ind / cluster_size
cat("Required clusters:", round(n_clusters, 0))
```