Updated vignettes

andrewherren · andrewherren · commit 1978ffdb78d2 · 2024-06-23T15:40:22.000-05:00
diff --git a/vignettes/CausalInference.Rmd b/vignettes/CausalInference.Rmd
@@ -803,15 +803,20 @@ We draw from a modified "demo 1" DGP
 
 ```{r}
 mu <- function(x) {1+g(x)+x[,1]*x[,3]-x[,2]+3*x[,3]}
-tau <- function(x) {1+0.5*abs(x[,1])-0.25*sin(2*x[,1])}
+tau <- function(x) {1+0.5*x[,1]}
 n <- 500
 snr <- 2
 x1 <- rnorm(n)
 x2 <- rnorm(n)
 x3 <- rnorm(n)
 x4 <- as.numeric(rbinom(n,1,0.5))
 x5 <- as.numeric(sample(1:3,n,replace=TRUE))
-X <- cbind(x1,x2,x3,x4,x5)
+x6 <- rnorm(n)
+x7 <- rnorm(n)
+x8 <- rnorm(n)
+x9 <- rnorm(n)
+x10 <- rnorm(n)
+X <- cbind(x1,x2,x3,x4,x5,x6,x7,x8,x9,x10)
 p <- ncol(X)
 mu_x <- mu(X)
 tau_x <- tau(X)
@@ -852,7 +857,7 @@ Here we simulate from the model with the original MCMC sampler, using all of the
 ```{r}
 num_gfr <- 0
 num_burnin <- 1000
-num_mcmc <- 1000
+num_mcmc <- 100
 num_samples <- num_gfr + num_burnin + num_mcmc
 bcf_model_mcmc <- bcf(
     X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, 
@@ -871,6 +876,12 @@ abline(0,1,col="red",lty=3,lwd=3)
 plot(rowMeans(bcf_model_mcmc$tau_hat_test), tau_test, 
      xlab = "predicted", ylab = "actual", main = "Treatment effect")
 abline(0,1,col="red",lty=3,lwd=3)
+plot(rowMeans(bcf_model_mcmc$y_hat_test), y_test, 
+     xlab = "predicted", ylab = "actual", main = "Outcome")
+abline(0,1,col="red",lty=3,lwd=3)
+plot(rowMeans(bcf_model_mcmc$y_hat_test-bcf_model_mcmc$mu_hat_test), tau_test*Z_test, 
+     xlab = "predicted", ylab = "actual", main = "Treatment effect term")
+abline(0,1,col="red",lty=3,lwd=3)
 sigma_observed <- var(y-E_XZ)
 plot_bounds <- c(min(c(bcf_model_mcmc$sigma2_samples, sigma_observed)), 
                  max(c(bcf_model_mcmc$sigma2_samples, sigma_observed)))
@@ -894,8 +905,10 @@ mean(cover)
 And test set RMSE
 
 ```{r}
-test_mean <- rowMeans(bcf_model_mcmc$tau_hat_test)
-sqrt(mean((test_mean - tau_test)^2))
+test_tau_mean <- rowMeans(bcf_model_mcmc$tau_hat_test)
+sqrt(mean((test_tau_mean - tau_test)^2))
+test_outcome_mean <- rowMeans(bcf_model_mcmc$y_hat_test)
+sqrt(mean((test_outcome_mean - y_test)^2))
 ```
 
 #### MCMC, covariate subset in $\tau(X)$
@@ -905,7 +918,7 @@ Here we simulate from the model with the original MCMC sampler, using only covar
 ```{r}
 num_gfr <- 0
 num_burnin <- 1000
-num_mcmc <- 1000
+num_mcmc <- 100
 num_samples <- num_gfr + num_burnin + num_mcmc
 bcf_model_mcmc <- bcf(
     X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, 
@@ -925,6 +938,12 @@ abline(0,1,col="red",lty=3,lwd=3)
 plot(rowMeans(bcf_model_mcmc$tau_hat_test), tau_test, 
      xlab = "predicted", ylab = "actual", main = "Treatment effect")
 abline(0,1,col="red",lty=3,lwd=3)
+plot(rowMeans(bcf_model_mcmc$y_hat_test), y_test, 
+     xlab = "predicted", ylab = "actual", main = "Outcome")
+abline(0,1,col="red",lty=3,lwd=3)
+plot(rowMeans(bcf_model_mcmc$y_hat_test-bcf_model_mcmc$mu_hat_test), tau_test*Z_test, 
+     xlab = "predicted", ylab = "actual", main = "Treatment effect term")
+abline(0,1,col="red",lty=3,lwd=3)
 sigma_observed <- var(y-E_XZ)
 plot_bounds <- c(min(c(bcf_model_mcmc$sigma2_samples, sigma_observed)), 
                  max(c(bcf_model_mcmc$sigma2_samples, sigma_observed)))
@@ -950,6 +969,8 @@ And test set RMSE
 ```{r}
 test_mean <- rowMeans(bcf_model_mcmc$tau_hat_test)
 sqrt(mean((test_mean - tau_test)^2))
+test_outcome_mean <- rowMeans(bcf_model_mcmc$y_hat_test)
+sqrt(mean((test_outcome_mean - y_test)^2))
 ```
 
 #### Warmstart, full covariate set in $\tau(X)$
@@ -959,7 +980,7 @@ Here we simulate from the model with the warm-start sampler, using all of the co
 ```{r}
 num_gfr <- 10
 num_burnin <- 0
-num_mcmc <- 1000
+num_mcmc <- 100
 num_samples <- num_gfr + num_burnin + num_mcmc
 bcf_model_warmstart <- bcf(
     X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, 
@@ -978,6 +999,12 @@ abline(0,1,col="red",lty=3,lwd=3)
 plot(rowMeans(bcf_model_warmstart$tau_hat_test), tau_test, 
      xlab = "predicted", ylab = "actual", main = "Treatment effect")
 abline(0,1,col="red",lty=3,lwd=3)
+plot(rowMeans(bcf_model_warmstart$y_hat_test), y_test, 
+     xlab = "predicted", ylab = "actual", main = "Outcome")
+abline(0,1,col="red",lty=3,lwd=3)
+plot(rowMeans(bcf_model_warmstart$y_hat_test - bcf_model_warmstart$mu_hat_test), tau_test*Z_test, 
+     xlab = "predicted", ylab = "actual", main = "Treatment effect term")
+abline(0,1,col="red",lty=3,lwd=3)
 sigma_observed <- var(y-E_XZ)
 plot_bounds <- c(min(c(bcf_model_warmstart$sigma2_samples, sigma_observed)), 
                  max(c(bcf_model_warmstart$sigma2_samples, sigma_observed)))
@@ -1001,8 +1028,10 @@ mean(cover)
 And test set RMSE
 
 ```{r}
-test_mean <- apply(bcf_model_warmstart$tau_hat_test, 1, mean)
-sqrt(mean((tau_x[test_inds] - test_mean)^2))
+test_tau_mean <- rowMeans(bcf_model_warmstart$tau_hat_test)
+sqrt(mean((tau_test - test_tau_mean)^2))
+test_outcome_mean <- rowMeans(bcf_model_warmstart$y_hat_test)
+sqrt(mean((y_test - test_outcome_mean)^2))
 ```
 
 #### Warmstart, covariate subset in $\tau(X)$
@@ -1012,14 +1041,14 @@ Here we simulate from the model with the warm-start sampler, using only covariat
 ```{r}
 num_gfr <- 10
 num_burnin <- 0
-num_mcmc <- 1000
+num_mcmc <- 100
 num_samples <- num_gfr + num_burnin + num_mcmc
 bcf_model_warmstart <- bcf(
     X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train, 
     X_test = X_test, Z_test = Z_test, pi_test = pi_test, 
     num_gfr = num_gfr, num_burnin = num_burnin, num_mcmc = num_mcmc, 
     sample_sigma_leaf_mu = F, sample_sigma_leaf_tau = F, 
-    keep_vars_tau = c("x1")
+    keep_vars_tau = c("x1"), random_seed = 2
 )
 ```
 
@@ -1032,6 +1061,12 @@ abline(0,1,col="red",lty=3,lwd=3)
 plot(rowMeans(bcf_model_warmstart$tau_hat_test), tau_test, 
      xlab = "predicted", ylab = "actual", main = "Treatment effect")
 abline(0,1,col="red",lty=3,lwd=3)
+plot(rowMeans(bcf_model_warmstart$y_hat_test), y_test, 
+     xlab = "predicted", ylab = "actual", main = "Outcome")
+abline(0,1,col="red",lty=3,lwd=3)
+plot(rowMeans(bcf_model_warmstart$y_hat_test-bcf_model_warmstart$mu_hat_test), tau_test*Z_test, 
+     xlab = "predicted", ylab = "actual", main = "Treatment effect term")
+abline(0,1,col="red",lty=3,lwd=3)
 sigma_observed <- var(y-E_XZ)
 plot_bounds <- c(min(c(bcf_model_warmstart$sigma2_samples, sigma_observed)), 
                  max(c(bcf_model_warmstart$sigma2_samples, sigma_observed)))
@@ -1055,8 +1090,10 @@ mean(cover)
 And test set RMSE
 
 ```{r}
-test_mean <- apply(bcf_model_warmstart$tau_hat_test, 1, mean)
-sqrt(mean((tau_x[test_inds] - test_mean)^2))
+test_tau_mean <- rowMeans(bcf_model_warmstart$tau_hat_test)
+sqrt(mean((tau_test - test_tau_mean)^2))
+test_outcome_mean <- rowMeans(bcf_model_warmstart$y_hat_test)
+sqrt(mean((y_test - test_outcome_mean)^2))
 ```
 
 # Continuous Treatment