## ---- include = FALSE--------------------------------------------------------- library(drcarlate) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----------------------------------------------------------------------------- # Parameter dgpflag declares three ways to generate random data. # FuncDGP will generate random data according to the method specified in (i), (ii) or (iii) when dgpflag = 1, 2 or 3 respectively. # Parameter rndflag declares four ways to randomly assign treatment effects. # rndflag = 1 - SRS; rndflag = 2 - WEI; rndflag = 3 - BCD; rndflag = 4 - SBR # Note that CovadpRnd is built into FuncDGP, so it is not necessary to use CovadpRnd alone in the actual operation of generating random data # Let's take dgpflag=1 and rndflag=2 for example random_dgp <- FuncDGP(dgptype = 1, rndflag = 2, n = 100, g = 4, pi = c(0.5, 0.5, 0.5, 0.5)) # We can see that the return value of FuncDGP is a list of nine matrices, We can easily extract what we need from it Y <- random_dgp$Y X <- random_dgp$X S <- random_dgp$S A <- random_dgp$A Y1 <- random_dgp$Y1 Y0 <- random_dgp$Y0 D1 <- random_dgp$D1 D0 <- random_dgp$D0 D <- random_dgp$D ## ----------------------------------------------------------------------------- # compute estimated LATE tauhat <- tau(muY1 = Y1, muY0 = Y0, muD1 = D1, muD0 = D0, A = A, S = S, Y = Y, D = D) #compute estimated treatment assignment probabilities pihat(A = A, S = S) # compute estimated standard deviation stanE(muY1 = Y1, muY0 = Y0, muD1 = D1, muD0 = D0, A = A, S = S, Y = Y, D = D, tauhat = tauhat) ## ---- message=FALSE,results='hide'-------------------------------------------- # let's take dgpflag = 1 for example. true_value <- TrueValue(dgptype = 1, vIdx = 1:4, n = 100, g = 4, pi = c(0.5, 0.5, 0.5, 0.5)) true_tau <- true_value$tau ## ----------------------------------------------------------------------------- # SRS - WEI - BCD - SBR true_tau ## ----------------------------------------------------------------------------- # remember that we set dgpflag = 1 and rndflag = 2 before LinearLogit(Y = Y, D = D, A = A, X = X, S = S, s = 4, modelflag = 1, iridge = 0.001) ## ----------------------------------------------------------------------------- # set random seed set.seed(1) ## ---- results='hide'---------------------------------------------------------- # get true tau true_tau <- TrueValue(dgptype = 1, vIdx = 1:4, n = 1000, g = 4, pi = c(0.5, 0.5, 0.5, 0.5)) ## ---- warning=FALSE----------------------------------------------------------- # see the output: size, iPert = 0 Output(ii = 1, tau = tauhat[1], dgptype = 1, rndflag = 1, n = 1000, g = 4, pi = c(0.5, 0.5, 0.5, 0.5), iPert = 0, iq = 0.05, iridge = 0.001) # see the output: power, iPert = 1 Output(ii = 1, tau = tauhat[1], dgptype = 1, rndflag = 1, n = 1000, g = 4, pi = c(0.5, 0.5, 0.5, 0.5), iPert = 1, iq = 0.05, iridge = 0.001) ## ----------------------------------------------------------------------------- # For example, if we wanted to get the results shown in Panel A in Table 1, we could run the following two functions separately. # First of all, There are four strata data (g = 4), the probability of data being treated at each strata is equal to 0.5 (pi = c(0.5, 0.5, 0.5, 0.5)), the random data generation process follows the data generation process 1 (DGP = 1), the total sample size is 200 (n = 200), run 10,000 Monte Carlo simulations (iMonte = 10000), the confidence level for hypothesis testing is 5%, and finally,get the size of the Monte Carlo simulation (iPert = 0). # Time Consumeing. This command will take about 1.4 hours to execute, so do not run it unless necessary. # JLTZ(iMonte = 10000, dgptype = 1, n = 200, g = 4, pi = c(0.5, 0.5, 0.5, 0.5), iPert = 0, iq = 0.05, iridge = 0.001) # Second of all, There are four strata data (g = 4), the probability of data being treated at each strata is equal to 0.5 (pi = c(0.5, 0.5, 0.5, 0.5)), the random data generation process follows the digital generation process 1 (DGP = 1), the total sample size was 200 (n = 200), run 10,000 Monte Carlo simulations (iMonte = 10000), the confidence level for hypothesis testing is 5%, and finally,get the power of the Monte Carlo simulation (iPert = 1). # Time Consumeing. This command will take about 1.4 hours to execute, so do not run it unless necessary. # JLTZ(iMonte = 10000, dgptype = 1, n = 200, g = 4, pi = c(0.5, 0.5, 0.5, 0.5), iPert = 1, iq = 0.05, iridge = 0.001) ## ----------------------------------------------------------------------------- # With the above Settings in mind, we get the following JLTZ function. # JLTZ(iMonte = 10000, dgptype = 1, n = 200, g = 4, pi = c(0.5,0.5,0.5,0.5), iPert = 0,iq = 0.05, iridge = 0.001) # Since it takes about 1.4 hours to run this function, we'll give you the results directly that users can check them out themselves. # vProb_d1 vProb_d2 vProb_d3 vProb_d4 # [1,] 0.03139898 0.03548546 0.03071509 0.03139898 # [2,] 0.04356403 0.04189256 0.03951368 0.04356403 # [3,] 0.04307085 0.04238541 0.03919373 0.04307085 # [4,] 0.05622226 0.04632824 0.05327148 0.05622226 # [5,] 0.10833470 0.08854937 0.09486482 0.10833470 # [6,] NaN NaN NaN NaN # [7,] 0.03435805 0.03334976 0.03455447 0.03435805 # [8,] 0.04586553 0.05076392 0.05087186 0.04586553 ## ----------------------------------------------------------------------------- # Set up ------------------------------------------------------------------ library(drcarlate) library(pracma) # Load data --------------------------------------------------------------- # data_for_final.csv add covariates b_total_income log_b_total_income b_exp_total_30days # edu_index b_asset_index b_health_index to data_for_tab4.csv data_table <- drcarlate::data_table data1 <- as.matrix(data_table) colnames(data1) <- NULL # create S for wave 1: number of stratnum, total 41 stratnum n <- size(data1,1) S <- zeros(n,1) for (i in 1:41) { S[data1[, size(data1,2)-i+1] == 1] <- 41 - i + 1 } data1 <- cbind(data1, S) # create Y, X, S, D: number of outcomes considered is 9: # save the results for NA, TSLS, L, NL, F vtauhat <- NaN * ones(8,5) vsighat <- NaN * ones(8,5) n_vec <- NaN * ones(8,1) iridge = 0.01 #tuning parameter for in ridge regression for (i in 1:8) { if (i <= 4) { # data_used: 1st col- Y, 2nd col- A, 3rd col- D, 4~(end-1)th col- X, # last col is strata number data_used <- cbind(data1[, 4+(i-1)*3], data1[, 2:3], data1[, 5:6], data1[, size(data1,2)-1-41], data1[, size(data1,2)]) } else { data_used <- cbind(data1[, 4+(i-1)*3], data1[,2:3], data1[, (4+(i-1)*3+1):(4+(i-1)*3+2)], data1[, size(data1,2)-1-41], data1[, size(data1,2)]) } # delete the outcome variables with NaN data_used <- data_used[!is.nan(data_used[,1]), ] # check whether a strata has obs less than 10 for (s in 1:41) { if (length(data_used[data_used[, size(data_used,2)] == s,1]) < 10) { # delete strata has obs less than 10 data_used[data_used[, size(data_used,2)] ==s, ] <- matrix() data_used <- data_used[!is.na(data_used[,1]),] } } # update n n <- size(data_used,1) n_vec[i] <- n # find the unique value for stratnum stratnum <- sort(base::unique(data_used[, size(data_used,2)])) # create A A <- matrix(data_used[, 2]) # create D D <- matrix(data_used[, 3]) # create Y Y <- matrix(data_used[, 1]) # create S S <- matrix(data_used[, size(data_used,2)]) # create X # use baseline total income X <- data_used[,4:6] # make a index print(stringr::str_c("Now i equals to ", i, " !")) #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %% No adjustment (z) NA #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% muY0_z <- zeros(n,1) muY1_z <- zeros(n,1) muD0_z <- zeros(n,1) muD1_z <- zeros(n,1) vtauhat[i,1] <- tau(muY1 = muY1_z, muY0 = muY0_z, muD1 = muD1_z, muD0 = muD0_z, A = A, S = S, Y = Y, D = D, stratnum = stratnum) vsighat[i,1] <- stanE(muY1 = muY1_z, muY0 = muY0_z, muD1 = muD1_z, muD0 = muD0_z, A = A, S = S, Y = Y, D = D, tauhat = vtauhat[i,1], stratnum = stratnum)/sqrt(n) #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %% TSLS #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% mS_iv <- zeros(n, length(stratnum)) for (j in 1:length(stratnum)) { s <- stratnum[j] mS_iv[S==s, j] <- 1 } mX_iv <- cbind(D, X, mS_iv) mZ_iv <- cbind(A, X, mS_iv) K <- size(mX_iv,2) vPara_iv <- inv(t(mZ_iv) %*% mX_iv) %*% t(mZ_iv) %*% Y mE_iv2 <- diag(((Y - mX_iv %*% vPara_iv)^2)[,]) m0_iv <- inv(t(mZ_iv) %*% mX_iv/n) %*% (t(mZ_iv) %*% mE_iv2 %*% mZ_iv/n) %*% inv(t(mX_iv) %*% mZ_iv/n) vtauhat[i,2] <- vPara_iv[1] vsighat[i,2] <- sqrt(m0_iv[1,1])/sqrt(n) #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %% Linear+linear model (L) #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% muY0_a <- NaN*ones(n,1) muY1_a <- NaN*ones(n,1) muD0_a <- NaN*ones(n,1) muD1_a <- NaN*ones(n,1) for (j in 1:length(stratnum)) { s = stratnum[j] result <- LinearLogit(Y = Y, D = D, A = A, X = X, S = S, s = s, modelflag = 1, iridge = iridge) theta_0s_a <- result[["theta_0s"]] theta_1s_a <- result[["theta_1s"]] beta_0s_a <- result[["beta_0s"]] beta_1s_a <- result[["beta_1s"]] muY0_a[S==s] <- X[S==s,] %*% theta_0s_a muY1_a[S==s] <- X[S==s,] %*% theta_1s_a muD0_a[S==s] <- X[S==s,] %*% beta_0s_a muD1_a[S==s] <- X[S==s,] %*% beta_1s_a } vtauhat[i,3] <- tau(muY1 = muY1_a, muY0 = muY0_a, muD1 = muD1_a, muD0 = muD0_a, A = A, S = S, Y = Y, D = D, stratnum = stratnum) vsighat[i,3] <- stanE(muY1 = muY1_a, muY0 = muY0_a, muD1 = muD1_a, muD0 = muD0_a, A = A, S = S, Y = Y, D = D, tauhat = vtauhat[i,3], stratnum = stratnum)/sqrt(n) #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %% Linear+logistic model (NL) #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% muY0_b <- NaN*ones(n,1) muY1_b <- NaN*ones(n,1) muD0_b <- NaN*ones(n,1) muD1_b <- NaN*ones(n,1) for (j in 1:length(stratnum)) { s = stratnum[j] result <- LinearLogit(Y = Y, D = D, A = A, X = X, S = S, s = s, modelflag = 2, iridge = iridge) theta_0s_b <- result[["theta_0s"]] theta_1s_b <- result[["theta_1s"]] beta_0s_b <- result[["beta_0s"]] beta_1s_b <- result[["beta_1s"]] muY0_b[S==s] <- X[S==s,] %*% matrix(theta_0s_b[2:length(theta_0s_b)]) + theta_0s_b[1] muY1_b[S==s] <- X[S==s,] %*% matrix(theta_1s_b[2:length(theta_0s_b)]) + theta_1s_b[1] muD0_b[S==s] <- LogisticReg(x = (X[S==s,] %*% matrix(beta_0s_b[-1]) + beta_0s_b[1])) muD1_b[S==s] <- LogisticReg(x = (X[S==s,] %*% matrix(beta_1s_b[-1]) + beta_1s_b[1])) } vtauhat[i,4] <- tau(muY1 = muY1_b, muY0 = muY0_b, muD1 = muD1_b, muD0 = muD0_b, A = A, S = S, Y = Y, D = D, stratnum = stratnum) vsighat[i,4] <- stanE(muY1 = muY1_b, muY0 = muY0_b, muD1 = muD1_b, muD0 = muD0_b, A = A, S = S, Y = Y, D = D,tauhat = vtauhat[i,4], stratnum = stratnum)/sqrt(n) #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # %% Further Efficiency Improvement #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% X_c <- cbind(X, muD1_b, muD0_b) muY0_c <- NaN*ones(n,1) muY1_c <- NaN*ones(n,1) muD0_c <- NaN*ones(n,1) muD1_c <- NaN*ones(n,1) for (j in 1:length(stratnum)) { s = stratnum[j] result <- LinearLogit(Y = Y, D = D, A = A, X = X_c, S = S, s = s, modelflag = 1, iridge = iridge) theta_0s_c <- result[["theta_0s"]] theta_1s_c <- result[["theta_1s"]] beta_0s_c <- result[["beta_0s"]] beta_1s_c <- result[["beta_1s"]] muY0_c[S==s] <- X_c[S==s,] %*% theta_0s_c muY1_c[S==s] <- X_c[S==s,] %*% theta_1s_c muD0_c[S==s] <- X_c[S==s,] %*% beta_0s_c muD1_c[S==s] <- X_c[S==s,] %*%beta_1s_c } vtauhat[i,5] <- tau(muY1 = muY1_c, muY0 = muY0_c, muD1 = muD1_c, muD0 = muD0_c, A = A, S = S, Y = Y, D = D, stratnum = stratnum) vsighat[i,5] <- stanE(muY1 = muY1_c, muY0 = muY0_c, muD1 = muD1_c, muD0 = muD0_c, A = A, S = S, Y = Y, D = D, tauhat = vtauhat[i,5], stratnum = stratnum)/sqrt(n) } # show the LATE estimates and standard errors in Table 5 of Jiang et al.(2022). # LATE Estimates vtauhat # Standard Errors vsighat