Skip to content

Commit 8296151

Browse files
authored
Add files via upload
1 parent c7ae4ca commit 8296151

12 files changed

+520
-282
lines changed

Diff for: DESCRIPTION

+21-8
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,31 @@
11
Package: Crossfit
2-
Title: Estimate ATE using cross-fit procedure for AIPW or TMLE estimator
2+
Title: Estimate ATE from AIPW or TMLE estimator in causal inference
33
Version: 0.1.0
4-
Authors@R:
5-
person("Momenul Haque", "Mondol", , "mmondol@isrt.ac.bd", role = c("aut", "cre"),
6-
comment = c(ORCID = "YOUR-ORCID-ID"))
7-
Description: Estimate ATE using cross-fit procedure for AIPW or TMLE estimator
4+
Authors@R:
5+
c(person(given = "Momenul Haque",
6+
family = "Mondol",
7+
role = c("aut", "cre"),
8+
email = "mhmondol@student.ubc.ca",
9+
comment = c(ORCID = "0000-0002-2624-2118")),
10+
person(given = "Muhammad Ehsanul",
11+
family = "Karim",
12+
role = "ctb",
13+
comment = c(ORCID = "0000-0002-0346-2871")))
14+
Description: Sample splitting (cross-fit) is applied to AIPW and TMLE to draw
15+
causal inference. Crossfit can be applied on AIPW and TMLE both for binary
16+
and continuous outcome.
817
License: MIT + file LICENSE
918
Encoding: UTF-8
1019
Roxygen: list(markdown = TRUE)
11-
RoxygenNote: 7.2.1
20+
RoxygenNote: 7.2.3
1221
Imports:
1322
dplyr,
14-
SuperLearner,
15-
tidyr
23+
tidyr,
24+
furrr,
25+
purrr,
26+
tibble,
27+
tmle,
28+
SuperLearner
1629
Depends:
1730
R (>= 2.10)
1831
LazyData: true

Diff for: NAMESPACE

+14
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,17 @@ export(aipw_multiple_p)
44
export(aipw_single_p)
55
export(tmle_multiple_p)
66
export(tmle_single_p)
7+
import(dplyr)
8+
import(furrr)
9+
import(purrr)
10+
import(tibble)
11+
import(tidyr)
12+
import(tmle)
13+
importFrom(stats,binomial)
14+
importFrom(stats,coef)
15+
importFrom(stats,glm)
16+
importFrom(stats,median)
17+
importFrom(stats,plogis)
18+
importFrom(stats,predict)
19+
importFrom(stats,qlogis)
20+
importFrom(stats,var)

Diff for: R/aipw_multiple_p.R

+56-19
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,43 @@
1-
#' apply aipw_single_p for num_cf times
2-
#'
3-
#' @param data similar as aipw_single_p() function
4-
#' @param exposure similar as aipw_single_p() function
5-
#' @param outcome similar as aipw_single_p() function
6-
#' @param covarsT similar as aipw_single_p() function
7-
#' @param covarsO similar as aipw_single_p() function
8-
#' @param learners similar as aipw_single_p() function
9-
#' @param control similar as aipw_single_p() function
10-
#' @param num_cf number of partitions
11-
#' @param n_split similar as aipw_single_p() function
12-
#' @param rand_split logical value; if be TRUE, discordant splits for exposure and outcome model are chosen at random ; otherwise chosen systematically.
13-
#' @param seed numeric value to reproduce the splits
14-
1+
#' Estimate ATE using from AIPW estimator using cross-fit algorithm (multiple repetition)
2+
#'
3+
#' @param data a data frame of tibble
4+
#' @param exposure name of exposure variable
5+
#' @param outcome name of outcome variable
6+
#' @param covarsT a vector of names of covaraites for treatment model
7+
#' @param covarsO a vector of names of covaraites for outcome model
8+
#' @param family.y it is the family for outcome model. It can `binomial() (default)` or `"gaussian"`
9+
#' @param learners similar as \code{\link[Superlearner:SL.library()]{Superlearner::SL.library()}}
10+
#' @param control similar as \code{\link[Superlearner:cvControl()]{Superlearner::cvControl()}}
11+
#' @param num_cf number of repetition done. The default is 5.
12+
#' @param n_split number of splits used, default `n_split = 3`
13+
#' @param rand_split logical value; if be FALSE `(default)`, discordant splits for exposure and outcome model are chosen systematically; otherwise chosen randomly.
14+
#' @param seed numeric value to reproduce the splits distribution
15+
#' @return a tibble of the estimates
1516
#' @return a tibble of the estimates
17+
#'
18+
#' @import dplyr tibble tidyr purrr furrr tmle
19+
#'
1620
#' @export
1721
#'
1822
#' @examples
23+
#'
24+
#' # See the README file for details
25+
#'
1926
#' sum(1:4)
2027
#'
2128
#'
22-
aipw_multiple_p <-function(data, exposure, outcome, covarsT, covarsO, learners, control, num_cf, n_split, rand_split = TRUE, seed = 145){
29+
aipw_multiple_p <- function(data,
30+
exposure,
31+
outcome,
32+
covarsT,
33+
covarsO,
34+
family.y = binomial(),
35+
learners = c("SL.glm", "SL.glmnet", "SL.xgboost"),
36+
control,
37+
num_cf = 5,
38+
n_split = 3,
39+
rand_split = FALSE,
40+
seed = 145){
2341

2442
#Initialize results
2543
runs <- tibble(r1=double(), r0=double(), rd=double(), v1=double(), v0=double(), vd=double())
@@ -29,7 +47,17 @@ aipw_multiple_p <-function(data, exposure, outcome, covarsT, covarsO, learners,
2947
cf_seed = sample(num_cf)
3048
for(cf in 1:num_cf){
3149
seed = cf_seed[cf]
32-
runs <- bind_rows(runs, aipw_single_p(data, exposure, outcome, covarsT, covarsO, learners, control, n_split, rand_split, seed))
50+
runs <- bind_rows(runs, aipw_single_p(data,
51+
exposure,
52+
outcome,
53+
covarsT,
54+
covarsO,
55+
family.y,
56+
learners,
57+
control,
58+
n_split,
59+
rand_split,
60+
seed))
3361

3462
}
3563
#Medians of splits
@@ -43,12 +71,21 @@ aipw_multiple_p <-function(data, exposure, outcome, covarsT, covarsO, learners,
4371
mvd = vd + (rd-medians[3])^2)
4472

4573
results <- apply(runs, 2, median)
46-
return(results)
47-
48-
}
4974

75+
fit <- list()
5076

77+
fit$ATE <- tibble(Estimate = results["rd"], std.error = sqrt(results["mvd"]),
78+
lower_ci = results["rd"] - 1.959964*sqrt(results["mvd"]),
79+
upper_ci = results["rd"] + 1.959964*sqrt(results["mvd"]))
5180

81+
fit$Effct_Treat <- tibble(Estimate = results["r1"], std.error = sqrt(results["mv1"]),
82+
lower_ci = results["r1"] - 1.959964*sqrt(results["mv1"]),
83+
upper_ci = results["r1"] + 1.959964*sqrt(results["mv1"]))
5284

85+
fit$Effct_Control <- tibble(Estimate = results["r0"], std.error = sqrt(results["mv0"]),
86+
lower_ci = results["r0"] - 1.959964*sqrt(results["mv0"]),
87+
upper_ci = results["r0"] + 1.959964*sqrt(results["mv0"]))
5388

89+
fit
5490

91+
}

Diff for: R/aipw_single_p.R

+35-12
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,42 @@
1-
#' Estimate ATE using cross-fit procedure for AIPW estimator
1+
#' Estimate ATE using AIPW estimator with cross-fit algorithm (single repetition)
22
#'
33
#' @param data a data frame of tibble
44
#' @param exposure name of exposure variable
55
#' @param outcome name of outcome variable
66
#' @param covarsT a vector of names of covaraites for treatment model
77
#' @param covarsO a vector of names of covaraites for outcome model
8+
#' @param family.y it is the family for outcome model. It can `binomial() (default)` or `"gaussian"`
89
#' @param learners similar as \code{\link[Superlearner:SL.library()]{Superlearner::SL.library()}}
910
#' @param control similar as \code{\link[Superlearner:cvControl()]{Superlearner::cvControl()}}
10-
#' @param n_split number of splits
11-
#' @param rand_split logical value; if be TRUE, discordant splits for exposure and outcome model are chosen at random ; otherwise chosen systematically.
12-
#' @param seed numeric value to reproduce the splits
13-
#' @return a tibble of estimates
11+
#' @param n_split number of splits used, default `n_split = 3`
12+
#' @param rand_split logical value; if be FALSE `(default)`, discordant splits for exposure and outcome model are chosen systematically; otherwise chosen randomly.
13+
#' @param seed numeric value to reproduce the splits distribution
14+
#' @return a tibble of estimates.
15+
#'
16+
#' @import dplyr tibble tidyr purrr furrr tmle
17+
#'
18+
#' @importFrom stats binomial coef glm median plogis predict qlogis var
1419
#'
1520
#' @export
1621
#'
1722
#' @examples
1823
#'
24+
#' # See the README file for details
25+
#'
26+
#'
1927
#' sum(1:5)
2028
#'
21-
aipw_single_p <- function(data, exposure, outcome, covarsT, covarsO, learners, control, n_split, rand_split = TRUE, seed = 145){
29+
aipw_single_p <- function(data,
30+
exposure,
31+
outcome,
32+
covarsT,
33+
covarsO,
34+
family.y,
35+
learners,
36+
control,
37+
n_split,
38+
rand_split,
39+
seed){
2240

2341
# Split sample
2442
set.seed(seed)
@@ -35,7 +53,11 @@ aipw_single_p <- function(data, exposure, outcome, covarsT, covarsO, learners, c
3553
# P-score model
3654

3755
pi_fitter <- function(df){
38-
SuperLearner(Y=as.matrix(df[, exposure]), X=df[, covarsT], family=binomial(), SL.library=learners, cvControl=control)
56+
SuperLearner(Y=as.matrix(df[, exposure]),
57+
X=df[, covarsT],
58+
family=binomial(),
59+
SL.library=learners,
60+
cvControl=control)
3961
}
4062

4163
dat_nested_p <- dat_nested_p %>%
@@ -55,15 +77,16 @@ aipw_single_p <- function(data, exposure, outcome, covarsT, covarsO, learners, c
5577

5678
#Outcome model
5779
mu_fitter <- function(df){
58-
SuperLearner::SuperLearner(Y=as.matrix(df[, outcome]), X=df[, c(exposure, covarsO)], family=binomial(), SL.library=learners, cvControl=control)
80+
SuperLearner::SuperLearner(Y=as.matrix(df[, outcome]),
81+
X=df[, c(exposure, covarsO)],
82+
family=family.y,
83+
SL.library=learners,
84+
cvControl=control)
5985
}
6086

6187
dat_nested_p <- dat_nested_p %>%
6288
mutate(mu_fit=map(data, mu_fitter))
6389

64-
65-
66-
6790
# Calc mu using each split
6891
dat1_p = dat0_p = data_p
6992

@@ -95,7 +118,7 @@ aipw_single_p <- function(data, exposure, outcome, covarsT, covarsO, learners, c
95118
select(s, paste0("mu0_", 1:n_split))
96119

97120
Y_p <- data_p %>%
98-
select(s, outcome)
121+
select(s, Y)
99122

100123
X_p <- data_p %>%
101124
select(s, exposure)

Diff for: R/data.R

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#' \item{risk_score}{Risk score of ASCVD for each patient}
1414
#' \item{risk_score_cat}{Risk category of ASCVD for each patient. The patient belongs to 3 category has the greatest risk}
1515
#'
16-
#' ...
1716
#' }
18-
#' @source
1917
#'
18+
"data"
19+

0 commit comments

Comments
 (0)