momenulhaque
diff --git a/Diff for: ‎DESCRIPTION
+21-8 b/Diff for: ‎DESCRIPTION
+21-8
diff --git a/Diff for: ‎NAMESPACE
+14 b/Diff for: ‎NAMESPACE
+14
diff --git a/Diff for: ‎R/aipw_multiple_p.R
+56-19 b/Diff for: ‎R/aipw_multiple_p.R
+56-19
diff --git a/Diff for: ‎R/aipw_single_p.R
+35-12 b/Diff for: ‎R/aipw_single_p.R
+35-12
diff --git a/Diff for: ‎R/data.R
+2-2 b/Diff for: ‎R/data.R
+2-2
@@ -1,18 +1,31 @@
 Package: Crossfit
-Title: Estimate ATE using cross-fit procedure for AIPW or TMLE estimator
+Title: Estimate ATE from AIPW or TMLE estimator in causal inference
 Version: 0.1.0
-Authors@R: 
-    person("Momenul Haque", "Mondol", , "mmondol@isrt.ac.bd", role = c("aut", "cre"),
-           comment = c(ORCID = "YOUR-ORCID-ID"))
-Description: Estimate ATE using cross-fit procedure for AIPW or TMLE estimator
+Authors@R:
+    c(person(given = "Momenul Haque",
+           family = "Mondol",
+           role = c("aut", "cre"),
+           email = "mhmondol@student.ubc.ca",
+           comment = c(ORCID = "0000-0002-2624-2118")),
+    person(given = "Muhammad Ehsanul",
+           family = "Karim",
+           role = "ctb",
+           comment = c(ORCID = "0000-0002-0346-2871")))
+Description: Sample splitting (cross-fit) is applied to AIPW and TMLE to draw
+  causal inference. Crossfit can be applied on AIPW and TMLE both for binary
+  and continuous outcome. 
 License: MIT + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.1
+RoxygenNote: 7.2.3
 Imports: 
     dplyr,
-    SuperLearner,
-    tidyr
+    tidyr,
+    furrr,
+    purrr,
+    tibble,
+    tmle, 
+    SuperLearner
 Depends: 
     R (>= 2.10)
 LazyData: true
@@ -4,3 +4,17 @@ export(aipw_multiple_p)
 export(aipw_single_p)
 export(tmle_multiple_p)
 export(tmle_single_p)
+import(dplyr)
+import(furrr)
+import(purrr)
+import(tibble)
+import(tidyr)
+import(tmle)
+importFrom(stats,binomial)
+importFrom(stats,coef)
+importFrom(stats,glm)
+importFrom(stats,median)
+importFrom(stats,plogis)
+importFrom(stats,predict)
+importFrom(stats,qlogis)
+importFrom(stats,var)
@@ -1,25 +1,43 @@
-#' apply aipw_single_p for num_cf times
-#'
-#' @param data similar as aipw_single_p() function
-#' @param exposure similar as aipw_single_p() function
-#' @param outcome similar as aipw_single_p() function
-#' @param covarsT similar as aipw_single_p() function
-#' @param covarsO similar as aipw_single_p() function
-#' @param learners similar as aipw_single_p() function
-#' @param control similar as aipw_single_p() function
-#' @param num_cf number of partitions
-#' @param n_split similar as aipw_single_p() function
-#' @param rand_split logical value; if be TRUE, discordant splits for exposure and outcome model are chosen at random ; otherwise chosen systematically.
-#' @param seed numeric value to reproduce the splits
-
+#' Estimate ATE using from AIPW estimator using cross-fit algorithm (multiple repetition)
+#'
+#' @param data a data frame of tibble
+#' @param exposure name of exposure variable
+#' @param outcome name of outcome variable
+#' @param covarsT a vector of names of covaraites for treatment model
+#' @param covarsO a vector of names of covaraites for outcome model
+#' @param family.y it is the family for outcome model. It can `binomial() (default)` or `"gaussian"`
+#' @param learners similar as \code{\link[Superlearner:SL.library()]{Superlearner::SL.library()}}
+#' @param control similar as  \code{\link[Superlearner:cvControl()]{Superlearner::cvControl()}}
+#' @param num_cf number of repetition done. The default is 5.
+#' @param n_split number of splits used, default `n_split = 3`
+#' @param rand_split logical value; if be FALSE `(default)`, discordant splits for exposure and outcome model are chosen systematically; otherwise chosen randomly.
+#' @param seed numeric value to reproduce the splits distribution
+#' @return a tibble of the estimates
 #' @return a tibble of the estimates
+#'
+#' @import dplyr tibble tidyr purrr furrr tmle
+#'
 #' @export
 #'
 #' @examples
+#'
+#' # See the README file for details
+#'
 #' sum(1:4)
 #'
 #'
-aipw_multiple_p <-function(data, exposure, outcome, covarsT, covarsO, learners, control, num_cf, n_split, rand_split = TRUE, seed = 145){
+aipw_multiple_p <- function(data,
+                            exposure,
+                            outcome,
+                            covarsT,
+                            covarsO,
+                            family.y = binomial(),
+                            learners = c("SL.glm", "SL.glmnet", "SL.xgboost"),
+                            control,
+                            num_cf = 5,
+                            n_split = 3,
+                            rand_split = FALSE,
+                            seed = 145){
 
   #Initialize results
   runs <- tibble(r1=double(), r0=double(), rd=double(), v1=double(), v0=double(), vd=double())
@@ -29,7 +47,17 @@ aipw_multiple_p <-function(data, exposure, outcome, covarsT, covarsO, learners,
   cf_seed = sample(num_cf)
   for(cf in 1:num_cf){
     seed = cf_seed[cf]
-    runs <- bind_rows(runs, aipw_single_p(data, exposure, outcome, covarsT, covarsO, learners, control, n_split, rand_split, seed))
+    runs <- bind_rows(runs, aipw_single_p(data,
+                                          exposure,
+                                          outcome,
+                                          covarsT,
+                                          covarsO,
+                                          family.y,
+                                          learners,
+                                          control,
+                                          n_split,
+                                          rand_split,
+                                          seed))
 
   }
   #Medians of splits
@@ -43,12 +71,21 @@ aipw_multiple_p <-function(data, exposure, outcome, covarsT, covarsO, learners,
            mvd = vd + (rd-medians[3])^2)
 
   results <- apply(runs, 2, median)
-  return(results)
-
-}
 
+  fit <- list()
 
+  fit$ATE <- tibble(Estimate = results["rd"], std.error = sqrt(results["mvd"]),
+                    lower_ci = results["rd"] - 1.959964*sqrt(results["mvd"]),
+                    upper_ci = results["rd"] + 1.959964*sqrt(results["mvd"]))
 
+  fit$Effct_Treat <- tibble(Estimate = results["r1"], std.error = sqrt(results["mv1"]),
+                            lower_ci = results["r1"] - 1.959964*sqrt(results["mv1"]),
+                            upper_ci = results["r1"] + 1.959964*sqrt(results["mv1"]))
 
+  fit$Effct_Control <- tibble(Estimate = results["r0"], std.error = sqrt(results["mv0"]),
+                              lower_ci = results["r0"] - 1.959964*sqrt(results["mv0"]),
+                              upper_ci = results["r0"] + 1.959964*sqrt(results["mv0"]))
 
+  fit
 
+}
@@ -1,24 +1,42 @@
-#' Estimate ATE using cross-fit procedure for AIPW estimator
+#' Estimate ATE using AIPW estimator with cross-fit algorithm (single repetition)
 #'
 #' @param data a data frame of tibble
 #' @param exposure name of exposure variable
 #' @param outcome name of outcome variable
 #' @param covarsT a vector of names of covaraites for treatment model
 #' @param covarsO a vector of names of covaraites for outcome model
+#' @param family.y it is the family for outcome model. It can `binomial() (default)` or `"gaussian"`
 #' @param learners similar as \code{\link[Superlearner:SL.library()]{Superlearner::SL.library()}}
 #' @param control similar as  \code{\link[Superlearner:cvControl()]{Superlearner::cvControl()}}
-#' @param n_split number of splits
-#' @param rand_split logical value; if be TRUE, discordant splits for exposure and outcome model are chosen at random ; otherwise chosen systematically.
-#' @param seed numeric value to reproduce the splits
-#' @return a tibble of estimates
+#' @param n_split number of splits used, default `n_split = 3`
+#' @param rand_split logical value; if be FALSE `(default)`, discordant splits for exposure and outcome model are chosen systematically; otherwise chosen randomly.
+#' @param seed numeric value to reproduce the splits distribution
+#' @return a tibble of estimates.
+#'
+#' @import dplyr tibble tidyr purrr furrr tmle
+#'
+#' @importFrom stats binomial coef glm median plogis predict qlogis var
 #'
 #' @export
 #'
 #' @examples
 #'
+#' # See the README file for details
+#'
+#'
 #' sum(1:5)
 #'
-aipw_single_p <- function(data, exposure, outcome, covarsT, covarsO, learners, control, n_split, rand_split = TRUE, seed = 145){
+aipw_single_p <- function(data,
+                          exposure,
+                          outcome,
+                          covarsT,
+                          covarsO,
+                          family.y,
+                          learners,
+                          control,
+                          n_split,
+                          rand_split,
+                          seed){
 
   # Split sample
   set.seed(seed)
@@ -35,7 +53,11 @@ aipw_single_p <- function(data, exposure, outcome, covarsT, covarsO, learners, c
   # P-score model
 
   pi_fitter <- function(df){
-    SuperLearner(Y=as.matrix(df[, exposure]), X=df[, covarsT], family=binomial(), SL.library=learners, cvControl=control)
+    SuperLearner(Y=as.matrix(df[, exposure]),
+                 X=df[, covarsT],
+                 family=binomial(),
+                 SL.library=learners,
+                 cvControl=control)
   }
 
   dat_nested_p <- dat_nested_p %>%
@@ -55,15 +77,16 @@ aipw_single_p <- function(data, exposure, outcome, covarsT, covarsO, learners, c
 
   #Outcome model
   mu_fitter <- function(df){
-    SuperLearner::SuperLearner(Y=as.matrix(df[, outcome]), X=df[, c(exposure, covarsO)], family=binomial(), SL.library=learners, cvControl=control)
+    SuperLearner::SuperLearner(Y=as.matrix(df[, outcome]),
+                               X=df[, c(exposure, covarsO)],
+                               family=family.y,
+                               SL.library=learners,
+                               cvControl=control)
   }
 
   dat_nested_p <- dat_nested_p %>%
     mutate(mu_fit=map(data, mu_fitter))
 
-
-
-
   # Calc mu using each split
   dat1_p = dat0_p = data_p
 
@@ -95,7 +118,7 @@ aipw_single_p <- function(data, exposure, outcome, covarsT, covarsO, learners, c
     select(s, paste0("mu0_", 1:n_split))
 
   Y_p <-  data_p %>%
-    select(s, outcome)
+    select(s, Y)
 
   X_p <-  data_p %>%
     select(s, exposure)
 
@@ -13,7 +13,7 @@
 #'   \item{risk_score}{Risk score of ASCVD for each patient}
 #'   \item{risk_score_cat}{Risk category of ASCVD for each patient. The patient belongs to 3 category has the greatest risk}
 #'
-#'    ...
 #' }
-#' @source
 #'
+"data"
+