## -----------------------------------------------------------------------------
pckgs <- c("knitr", "tidyverse", "ggalt", "cobalt", "ggsci",
    "modEvA", "naniar", "DT", "Hmisc", "hrbrthemes", "summarytools", 
     "survival", "causalCmprsk") # "kableExtra",


## ----cran_packages0, echo=TRUE, include=TRUE, message=FALSE, warning=FALSE----
ix=NULL
for (package in pckgs)
  if (!requireNamespace(package, quietly = TRUE)) ix <- c(ix, package)
if (length(ix)!=0)
{
pr <- "WARNING: Packages: "
for (i in ix) 
  pr <- paste(pr, i, sep="")
pr <- paste(pr, "\nhave to be installed in order to successfully compile the rest of the vignette. Please install these packages and compile again.", sep="")
cat(pr)
knitr::knit_exit()
}

## ----cran_packages, echo=TRUE, include=TRUE, message=FALSE, warning=FALSE-----
for (package in pckgs)
  library(package, character.only=TRUE) 

opts_chunk$set(results = 'asis',      # Can also be set at the chunk-level
                 comment = NA,
                 prompt  = FALSE,
                 cache   = FALSE)
summarytools::st_options(plain.ascii = FALSE,        # Always use this option in Rmd documents
          style        = "rmarkdown",  # Always use this option in Rmd documents
          footnote     = NA,           # Makes html-rendered results more concise
          subtitle.emphasis = FALSE)   # Improves layout with some rmardown themes

## ----code1, echo=TRUE---------------------------------------------------------
Hmisc::getHdata(rhc) # loading data using the Hmisc package
rhc_raw <- rhc

## -----------------------------------------------------------------------------
miss.report <- rhc_raw %>% miss_var_summary()
kable(miss.report[1:10,]) # %>% kable_styling(bootstrap_options = "striped", full_width = F)

## -----------------------------------------------------------------------------
rhc_cleaning1 <- rhc_raw %>%
  mutate(RHC = as.numeric(swang1 == "RHC"), 
         trt=swang1,
         E = ifelse(is.na(dthdte), 1, ifelse(dschdte==dthdte, 2, 1)), 
         Time = dschdte - sadmdte, #=time-to-"Discharge" (either alive or due to death in a hospital)
         T.death = ifelse(is.na(dthdte), lstctdte - sadmdte, dthdte - sadmdte), #=min(Time to death before or after discharge, Time to a last follow-up visit)
         D = ifelse(death =="No", 0, 1), # death indicator
         sex_Male = as.numeric(sex == "Male"),
         race_black = as.numeric(race == "black"),
         race_other = as.numeric(race == "other"),
         income_11_25K = as.numeric(income == "$11-$25k"),
         income_25_50K = as.numeric(income == "$25-$50k"),
         income_50K = as.numeric(income == "> $50k"),
         ninsclas_Private_Medicare = as.numeric(ninsclas == "Private & Medicare"),
         ninsclas_Medicare = as.numeric(ninsclas == "Medicare"),
         ninsclas_Medicare_Medicaid = as.numeric(ninsclas == "Medicare & Medicaid"),
         ninsclas_Medicaid = as.numeric(ninsclas == "Medicaid"),
         ninsclas_No_Insurance = as.numeric(ninsclas == "No insurance"),
# combine cat1 with cat2, i.e the primary disease category with the secondary disease category:         
         cat_CHF = as.numeric(cat1 == "CHF" | (!is.na(cat2))&(cat2 == "CHF") ),
         cat_Cirrhosis = as.numeric(cat1 == "Cirrhosis" | (!is.na(cat2))&(cat2 == "Cirrhosis")),
         cat_Colon_Cancer = as.numeric(cat1 == "Colon Cancer" | (!is.na(cat2))&(cat2 == "Colon Cancer")),
         cat_Coma = as.numeric(cat1 == "Coma" | (!is.na(cat2))&(cat2 == "Coma")),
         cat_COPD = as.numeric(cat1 == "COPD" | (!is.na(cat2))&(cat2 == "COPD")),
         cat_Lung_Cancer = as.numeric(cat1 == "Lung Cancer" | (!is.na(cat2))&(cat2 == "Lung Cancer")),
         cat_MOSF_Malignancy = as.numeric(cat1 == "MOSF w/Malignancy" | (!is.na(cat2))&(cat2 == "MOSF w/Malignancy")),
         cat_MOSF_Sepsis = as.numeric(cat1 == "MOSF w/Sepsis" | (!is.na(cat2))&(cat2 == "MOSF w/Sepsis")),
         dnr1_Yes = as.numeric(dnr1 == "Yes"),
         card_Yes = as.numeric(card == "Yes"),
         gastr_Yes = as.numeric(gastr == "Yes"),
         hema_Yes = as.numeric(hema == "Yes"),
         meta_Yes = as.numeric(meta == "Yes"),
         neuro_Yes = as.numeric(neuro == "Yes"),
         ortho_Yes = as.numeric(ortho == "Yes"),
         renal_Yes = as.numeric(renal == "Yes"),
         resp_Yes = as.numeric(resp == "Yes"),
         seps_Yes = as.numeric(seps == "Yes"),
         trauma_Yes = as.numeric(trauma == "Yes"),
         ca_Yes = as.numeric(ca == "Yes"),
         ca_Metastatic = as.numeric(ca == "Metastatic")
  )

# variables selection and data reordering:
rhc_full <- rhc_cleaning1 %>% 
  select(ptid, RHC, trt, Time, T.death, E, D, sex_Male, 
         age, edu, race_black, race_other, income_11_25K, income_25_50K, income_50K,
         ninsclas_Private_Medicare, ninsclas_Medicare, ninsclas_Medicare_Medicaid,
         ninsclas_Medicaid, ninsclas_No_Insurance, 
         cat_CHF, cat_Cirrhosis, cat_Colon_Cancer, cat_Coma, cat_COPD, cat_Lung_Cancer, 
         cat_MOSF_Malignancy, cat_MOSF_Sepsis,
         # diagnoses:
         dnr1_Yes, card_Yes, gastr_Yes, hema_Yes, meta_Yes, neuro_Yes, ortho_Yes, renal_Yes, 
         resp_Yes, seps_Yes, trauma_Yes,
         ca_Yes, ca_Metastatic,
         # lab tests:
         wtkilo1, hrt1, meanbp1, resp1, temp1,
         aps1, das2d3pc, scoma1, 
         surv2md1, alb1, bili1, crea1, hema1, paco21, 
         pafi1, ph1, pot1, sod1, wblc1,
         # all variables with "hx" are preexisting conditions: 
         amihx, cardiohx, chfhx, chrpulhx, dementhx, 
         gibledhx, immunhx, liverhx, malighx, psychhx, 
         renalhx, transhx, 
         death, sadmdte, dschdte, dthdte, lstctdte)

## -----------------------------------------------------------------------------
# omit 1 obs with missing discharge date for the length-of-stay analysis:
rhc <- rhc_full[!is.na(rhc_raw$dschdte),]
rhc_full$T.death.30 <- pmin(30, rhc_full$T.death)
rhc_full$D.30 <- rhc_full$D*ifelse(rhc_full$T.death <=30, 1, 0)

## ---- echo=TRUE---------------------------------------------------------------
E <- as.factor(rhc$E)
levels(E) <- c("discharge", "in-hospital death")
t <- addmargins(table(E, useNA="no"))
kable(t) # %>% kable_styling(bootstrap_options = "striped", full_width = F)

## ---- echo=TRUE---------------------------------------------------------------
D <- as.factor(rhc_full$D)
levels(D) <- c("censored", "died")
t <- addmargins(table(D, useNA="no"))
kable(t) # %>% kable_styling(bootstrap_options = "striped", full_width = F)

## ---- echo=TRUE---------------------------------------------------------------
D.30 <- as.factor(rhc_full$D.30)
levels(D.30) <- c("censored", "died")
t <- addmargins(table(D.30, useNA="no"))
kable(t) #%>% kable_styling(bootstrap_options = "striped", full_width = F)

## ---- echo=TRUE---------------------------------------------------------------
t <- addmargins(table(rhc_full$trt, useNA="no"))
kable(t) #%>% kable_styling(bootstrap_options = "striped", full_width = F)

## -----------------------------------------------------------------------------
covs.names <- c("age", "sex_Male", "edu", "race_black", "race_other",
                "income_11_25K", "income_25_50K", "income_50K", 
                "ninsclas_Private_Medicare", "ninsclas_Medicare", "ninsclas_Medicare_Medicaid",
                "ninsclas_Medicaid", "ninsclas_No_Insurance",
                "cat_CHF", "cat_Cirrhosis", "cat_Colon_Cancer", "cat_Coma", "cat_COPD", "cat_Lung_Cancer", 
                "cat_MOSF_Malignancy", "cat_MOSF_Sepsis", 
                "dnr1_Yes", "wtkilo1", "hrt1", "meanbp1", 
                "resp1", "temp1", 
                "card_Yes", "gastr_Yes", "hema_Yes", "meta_Yes", "neuro_Yes", "ortho_Yes", 
                "renal_Yes", "resp_Yes", "seps_Yes", "trauma_Yes", 
                "ca_Yes", "ca_Metastatic",
                "amihx", "cardiohx", "chfhx", "chrpulhx",
                "dementhx", "gibledhx", "immunhx", "liverhx", 
                "malighx", "psychhx", "renalhx", "transhx",
                "aps1", "das2d3pc", "scoma1", "surv2md1",
                "alb1", "bili1", "crea1", "hema1", "paco21", "pafi1", 
                "ph1", "pot1", "sod1", "wblc1")


## -----------------------------------------------------------------------------
form.txt <- paste0("RHC", " ~ ", paste0(covs.names, collapse = "+"))
trt.formula <- as.formula(form.txt)
ps.stab.ATE <- get.weights(formula=trt.formula, data=rhc, wtype = "stab.ATE") 

## ---- fig.align="center", fig.cap="Visual test of goodness-of-fit of a PS model and Hosmer-Lemeshow test.", fig.height=5, fig.width=5----
gof <- HLfit(obs=rhc$RHC, pred=ps.stab.ATE$ps, n.bins=50, bin.method = "quantiles", plot.bin.size=FALSE)

## ----label=better, fig.align="center", fig.cap="Checking goodness-of-fit of a PS model with selection of significant covariates.", fig.height=5, fig.width=5----
ind <- ps.stab.ATE$summary.glm$coefficients[,4]<0.05
form.txt <- paste0("RHC", " ~ ", paste0(covs.names[ind], collapse = "+"))
trt.formula.1 <- as.formula(form.txt)
ps.stab.ATE.2 <- get.weights(formula=trt.formula.1, data=rhc, wtype = "stab.ATE")
gof <- HLfit(obs=rhc$RHC, pred=ps.stab.ATE.2$ps, n.bins=50, bin.method = "quantiles", plot.bin.size=FALSE)

## ---- fig.align="center", label=fig9, fig.cap="Checking overlap in PS distributions.", fig.height=5, fig.width=5----
rhc.ps <- rhc %>% mutate(ps = ps.stab.ATE$ps, stab.ATE.w = ps.stab.ATE$w)
ggplot(rhc.ps, aes(x = ps, fill = trt, color=trt)) +  geom_density(alpha = 0.5) + scale_fill_npg() + scale_color_npg()+ theme(plot.title = element_text(hjust = 0.5), legend.position = c(0.8, 0.8),
          legend.background=element_rect(fill="transparent"))

## -----------------------------------------------------------------------------
ps.overlap <- get.weights(formula=trt.formula, data=rhc, wtype = "overlap")
rhc.ps <- rhc.ps %>%   mutate(overlap.w = ps.overlap$w)

## ---- fig.align="center", label=fig10, fig.width=6.5, fig.height=8, message=FALSE, warning=FALSE, fig.cap="Testing covariate balance of the PS model with all covariates."----
#fig.asp=1.1, 
covs <- subset(rhc.ps, select = covs.names)
suppressWarnings(love.plot(RHC ~ covs, data=rhc.ps,
              weights = list(overlap=rhc.ps$overlap.w, stab.ATE=rhc.ps$stab.ATE.w), 
              threshold = .1, abs=TRUE, binary = "std", stars="raw", s.d.denom="pooled",
          var.order = "unadjusted",   
          title = "Comparing balancing weights") + scale_fill_npg() + scale_color_npg())

## ----figcompare, label=figcompare, fig.align="center", fig.width=6.5, fig.height=8,  message=FALSE, warning=FALSE, fig.cap="Testing covariate balance of the PS model with selection of significant covariates."----
ps.overlap.2 <- get.weights(formula=trt.formula.1, data=rhc, wtype = "overlap")
rhc.ps <- rhc.ps %>% mutate(ps.2 = ps.stab.ATE.2$ps, stab.ATE.w.2 = ps.stab.ATE.2$w,
                         overlap.w.2 = ps.overlap.2$w)
covs <- subset(rhc.ps, select = covs.names)
suppressWarnings(love.plot(RHC ~ covs, data=rhc.ps,
              weights = list(overlap=rhc.ps$overlap.w.2, stab.ATE=rhc.ps$stab.ATE.w.2), 
              threshold = .1, abs=TRUE, binary = "std", stars="raw", s.d.denom="pooled",
          var.order = "unadjusted",   
          title = "Comparing balancing weights") + scale_fill_npg() + scale_color_npg())

## ---- fig.align="center", out.width=c('33%', '33%', '34%'), fig.show='hold',  message=FALSE, warning=FALSE, fig.cap="Comparing distributions of `aps1` across two treatment arms for: (A) unadjusted, (B) stab.ATE, (C) overlap weights"----
rhc.ps$stab.ATE.w.1 <- rhc.ps$overlap.w.1<- rep(NA, nrow(rhc.ps))
rhc.ps$stab.ATE.w.1[rhc.ps$trt=="RHC"] <-
  rhc.ps$stab.ATE.w[rhc.ps$trt=="RHC"]/sum(rhc.ps$stab.ATE.w[rhc.ps$trt=="RHC"])
rhc.ps$stab.ATE.w.1[rhc.ps$trt=="No RHC"] <-
  rhc.ps$stab.ATE.w[rhc.ps$trt=="No RHC"]/sum(rhc.ps$stab.ATE.w[rhc.ps$trt=="No RHC"])
rhc.ps$overlap.w.1[rhc.ps$trt=="RHC"] <-
  rhc.ps$overlap.w[rhc.ps$trt=="RHC"]/sum(rhc.ps$overlap.w[rhc.ps$trt=="RHC"])
rhc.ps$overlap.w.1[rhc.ps$trt=="No RHC"] <-
  rhc.ps$overlap.w[rhc.ps$trt=="No RHC"]/sum(rhc.ps$overlap.w[rhc.ps$trt=="No RHC"])
ggplot(rhc.ps, aes(x = aps1, fill = trt, color=trt)) + ggtitle("A")+
  geom_density(alpha = 0.5) + scale_fill_npg() + scale_color_npg()+
  theme(plot.title = element_text(hjust = 0.5), legend.position = c(0.7, 0.7),
          legend.background=element_rect(fill="transparent"))
ggplot(rhc.ps, aes(x = aps1, fill = trt, color=trt, weight=stab.ATE.w.1)) + ggtitle("B")+ geom_density(alpha = 0.5) + scale_fill_npg() + scale_color_npg()+
  theme(plot.title = element_text(hjust = 0.5), legend.position = c(0.7, 0.7),
          legend.background=element_rect(fill="transparent"))
ggplot(rhc.ps, aes(x = aps1, fill = trt, color=trt, weight=overlap.w.1)) + ggtitle("C")+  geom_density(alpha = 0.5) + scale_fill_npg() + scale_color_npg()+
  theme(plot.title = element_text(hjust = 0.5), legend.position = c(0.7, 0.7),
          legend.background=element_rect(fill="transparent"))

## ---- fig.align="center", out.width=c('33%', '33%', '34%'), message=FALSE, warning=FALSE, fig.cap="Comparing distributions of `meanbp1` across two treatment arms for: (A) unadjusted, (B) stab.ATE, (C) overlap weights", fig.show='hold'----
ggplot(rhc.ps, aes(x = meanbp1, fill = trt, color=trt)) + ggtitle("A")+
  geom_density(alpha = 0.5) + scale_fill_npg() + scale_color_npg()+
  theme(plot.title = element_text(hjust = 0.5), legend.position = c(0.7, 0.7),
          legend.background=element_rect(fill="transparent"))
ggplot(rhc.ps, aes(x = meanbp1, fill = trt, color=trt, weight=stab.ATE.w.1)) + ggtitle("B")+ geom_density(alpha = 0.5) + scale_fill_npg() + scale_color_npg()+
  theme(plot.title = element_text(hjust = 0.5), legend.position = c(0.7, 0.7),
          legend.background=element_rect(fill="transparent"))
ggplot(rhc.ps, aes(x = meanbp1, fill = trt, color=trt, weight=overlap.w.1)) + ggtitle("C")+  geom_density(alpha = 0.5) + scale_fill_npg() + scale_color_npg()+
  theme(plot.title = element_text(hjust = 0.5), legend.position = c(0.7, 0.7),
          legend.background=element_rect(fill="transparent"))

## ---- label=table1------------------------------------------------------------
d <- rhc.ps[, covs.names]
add.names <- function(x, a) { paste(x,a, sep="")}
# original population:
descr.orig <- round( cbind( 
  apply(d, 2, wtd.mean, weights=rep(1, nrow(d))),
  sqrt(apply(d, 2, wtd.var, weights=rep(1, nrow(d)))), 
  t(apply(d, 2, wtd.quantile, weights=rep(1, nrow(d)), probs=c(.25, .5, .75)))
), dig=2)
descr.ATE <- round( cbind( 
  apply(d, 2, wtd.mean, weights=rhc.ps$stab.ATE.w),
  sqrt(apply(d, 2, wtd.var, weights=rhc.ps$stab.ATE.w)), 
  t(apply(d, 2, wtd.quantile, weights=rhc.ps$stab.ATE.w, probs=c(.25, .5, .75)))
), dig=2)
descr.overlap <- round( cbind( 
  apply(d, 2, wtd.mean, weights=rhc.ps$overlap.w),
  sqrt(apply(d, 2, wtd.var, weights=rhc.ps$overlap.w)), 
  t(apply(d, 2, wtd.quantile, weights=rhc.ps$overlap.w, probs=c(.25, .5, .75)))
), dig=2)
df3 <- data.frame(cbind(descr.orig, descr.ATE, descr.overlap))
row.names(df3) <- covs.names
names(df3) <- rep( c("mean", "sd", "q1", "med", "q3"), times=3)

kable(df3, caption="Comparing marginal distributions of covariates for three populations. q1, med and q3 correspond to 1st, 2nd and 3rd quartiles of covariates distributions for: (1) the original  population; (2) stab.ATE population; (3) overlap population.") 
#%>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE) %>%
#   add_header_above(c("covariate "= 1, "original population" = 5, "stab.ATE population" = 5, "overlap population"=5)) %>%
 # column_spec(2, background = "lightgrey") %>%
#  column_spec(7, background = "lightgrey") %>%
#  column_spec(12, background = "lightgrey") %>%  
 #    scroll_box(width = "100%", height = "500px")

## ----label=ATEwdistr, fig.height=5, fig.width=5, fig.align="center" ,  message=FALSE, warning=FALSE, fig.cap="Distribution of stab.ATE weights."----
ggplot(rhc.ps, aes(x = stab.ATE.w, fill = trt, color=trt)) +  
  geom_histogram(alpha = 0.4, bins=30) + scale_fill_npg() + scale_color_npg()+ theme(plot.title = element_text(hjust = 0.5), legend.position = c(0.8, 0.8),
          legend.background=element_rect(fill="transparent")) + ggtitle("Distribution of stab.ATE weights")

## ---- label=influential-------------------------------------------------------
kable(rhc.ps[rhc.ps$stab.ATE.w>10, c("trt", "stab.ATE.w", "overlap.w", "ps")],
      caption="The list of most influential observations in the stab.ATE weighting.")

## ---- label=overlapdistr, fig.height=5, fig.width=5, fig.align="center" ,  message=FALSE, warning=FALSE, fig.cap="Distribution of overlap weights."----
ggplot(rhc.ps, aes(x = overlap.w, fill = trt, color=trt)) +  
  geom_histogram(alpha = 0.4, bins=30) + scale_fill_npg() + scale_color_npg()+ theme(plot.title = element_text(hjust = 0.5), legend.position = c(0.8, 0.8),
          legend.background=element_rect(fill="transparent")) + ggtitle("Distribution of overlap weights")

## -----------------------------------------------------------------------------
# overlap weights========================:
# Nonparametric estimation:
res.overlap <- fit.nonpar(df=rhc, X="Time", E="E", trt.formula=trt.formula, 
                          wtype="overlap", cens=0, conf.level=0.95, bs=TRUE, nbs.rep=50, seed=17, parallel = FALSE) # The small number of bootstrap replications (nbs.rep=50) was chosen for illustration purposes.  
# Cox-based estimation:
res.cox.overlap <- fit.cox(df=rhc, X="Time", E="E", trt.formula=trt.formula, 
                          wtype="overlap", cens=0, conf.level=0.95, bs=TRUE, nbs.rep=50, seed=17, parallel = FALSE)

# stab.ATE weights==========================:
# Nonparametric estimation:
res.stab.ATE <- fit.nonpar(df=rhc, X="Time", E="E", trt.formula=trt.formula, 
                          wtype="stab.ATE", cens=0, conf.level=0.95, bs=TRUE, nbs.rep=50,
                          seed=17, parallel = FALSE)
# Cox-based estimation:
res.cox.stab.ATE <- fit.cox(df=rhc, X="Time", E="E", trt.formula=trt.formula, 
                          wtype="stab.ATE", cens=0, conf.level=0.95, bs=TRUE, nbs.rep=50,
                          seed=17, parallel = FALSE)
# unadjusted analysis========================:
# Nonparametric estimation:
res.un <- fit.nonpar(df=rhc, X="Time", E="E", trt.formula=trt.formula, 
                          wtype="unadj", cens=0, conf.level=0.95, bs=TRUE, nbs.rep=50,
                     seed=17, parallel = FALSE)
# Cox-based estimation:
res.cox.un <- fit.cox(df=rhc, X="Time", E="E", trt.formula=trt.formula, 
                          wtype="unadj", cens=0, conf.level=0.95, bs=TRUE, nbs.rep=50,
                      seed=17, parallel = FALSE)

## ----echo=TRUE, message=FALSE, warning=FALSE----------------------------------
get.logCumHR <- function(res, res.cox)
{
  df1 <- rbind( data.frame(time=res$time, Outcome=1,
                      CumHazR=res$trt.eff[[paste("Ev=", 1, sep="")]]$log.CumHazR, 
                      CIL.CumHazR=res$trt.eff[[paste("Ev=", 1, sep="")]]$log.CumHazR.CI.L, 
                      CIU.CumHazR=res$trt.eff[[paste("Ev=", 1, sep="")]]$log.CumHazR.CI.U),
            data.frame(time=res$time, Outcome=2,
                      CumHazR=res$trt.eff[[paste("Ev=", 2, sep="")]]$log.CumHazR, 
                      CIL.CumHazR=res$trt.eff[[paste("Ev=", 2, sep="")]]$log.CumHazR.CI.L, 
                      CIU.CumHazR=res$trt.eff[[paste("Ev=", 2, sep="")]]$log.CumHazR.CI.U))
  df2 <- data.frame(time=c(min(res.cox$time), max(res.cox$time)), Outcome=rep(3, 2),
                         CumHazR=rep(res.cox$trt.eff[[paste("Ev=", 1, sep="")]]$log.CumHazR, 2),    
                         CIL.CumHazR=rep(NA, 2),
                         CIU.CumHazR=rep(NA, 2))
  df3 <- data.frame(time=c(min(res.cox$time), max(res.cox$time)), Outcome=rep(4, 2),
                         CumHazR=rep(res.cox$trt.eff[[paste("Ev=", 2, sep="")]]$log.CumHazR, 2),  
                         CIL.CumHazR=rep(NA, 2),
                         CIU.CumHazR=rep(NA, 2))
  df <- rbind(df1, df2, df3)
  df$Outcome <- factor(df$Outcome)
  levels(df$Outcome) <- c("Discharge-nonpar", "Death-nonpar", 
                          paste("Discharge-Cox:", round(res.cox$trt.eff[[paste("Ev=", 1, sep="")]]$log.CumHazR,dig=2),
                                ", 95% CI=[",
                                round(res.cox$trt.eff[[paste("Ev=", 1, sep="")]]$log.CumHazR.CI.L,dig=2),
                                ",",
                                round(res.cox$trt.eff[[paste("Ev=", 1, sep="")]]$log.CumHazR.CI.U,dig=2),
                                "]", sep=""), 
                          paste("Death-Cox:", round(res.cox$trt.eff[[paste("Ev=", 2, sep="")]]$log.CumHazR,dig=2), ", 95% CI=[",
                                round(res.cox$trt.eff[[paste("Ev=", 2, sep="")]]$log.CumHazR.CI.L,dig=2),
                                ",",
                                round(res.cox$trt.eff[[paste("Ev=", 2, sep="")]]$log.CumHazR.CI.U,dig=2),
                                "]", sep=""))
  return(df)
}
logCumHR.overlap <- get.logCumHR(res.overlap, res.cox.overlap)
logCumHR.stab.ATE <- get.logCumHR(res.stab.ATE, res.cox.stab.ATE)
logCumHR.unadj <- get.logCumHR(res.un, res.cox.un)

## ----echo=TRUE, fig.align="center", fig.show='hold', fig.width=4.5, fig.height=4.5,  message=FALSE, warning=FALSE, fig.cap="log(Cum.HR(t)) of RHC vs No-RHC for discharge and in-hospital death."----

plot.logCumHazR <- function(df, title)
{
  p <- ggplot(df, aes(x=time, y=CumHazR, color=Outcome, fill=Outcome, shape=Outcome)) + ggtitle(title) +
    geom_step(size=1.1) + 
      geom_ribbon(aes(ymin=CIL.CumHazR, ymax=CIU.CumHazR), alpha=0.2, stat="stepribbon") +
    scale_fill_npg() + scale_color_npg()
  p <- p + xlab("time from admission to ICU (days)") + ylab("log.CumHR(t)") + 
    theme(axis.text.x = element_text(face="bold", angle=45),
          axis.text.y = element_text(face="bold"), plot.title = element_text(hjust = 0.5))+
    theme(legend.position = c(0.5, 0.3),
          legend.background=element_rect(fill="transparent"),
          panel.grid.minor = element_line(size = .5,colour = "gray92"), 
          panel.grid.major = element_line(size = .5,colour = "#C0C0C0")) +
    geom_vline(xintercept=30, linetype="dashed")
  p
}

plot.logCumHazR(df=logCumHR.overlap, title="Overlap weights") 
plot.logCumHazR(df=logCumHR.stab.ATE, title="Stabilized ATE weights") 
plot.logCumHazR(df=logCumHR.unadj, title="Unadjusted") 

## ----echo=TRUE, message=FALSE, warning=FALSE----------------------------------
get.CumHaz <- function(res1, res2, res3, Ev)
{
  df <- rbind( data.frame(time=res1$time, population=1, TRT=1,
                          CumHaz=res1$trt.1[[paste("Ev=", Ev, sep="")]]$CumHaz, 
                          CIL.CumHaz=res1$trt.1[[paste("Ev=", Ev, sep="")]]$CumHaz.CI.L, 
                          CIU.CumHaz=res1$trt.1[[paste("Ev=", Ev, sep="")]]$CumHaz.CI.U),
              data.frame(time=res2$time, population=2, TRT=1,
                          CumHaz=res2$trt.1[[paste("Ev=", Ev, sep="")]]$CumHaz, 
                          CIL.CumHaz=res2$trt.1[[paste("Ev=", Ev, sep="")]]$CumHaz.CI.L, 
                          CIU.CumHaz=res2$trt.1[[paste("Ev=", Ev, sep="")]]$CumHaz.CI.U),
             data.frame(time=res3$time, population=3, TRT=1,
                          CumHaz=res3$trt.1[[paste("Ev=", Ev, sep="")]]$CumHaz, 
                          CIL.CumHaz=res3$trt.1[[paste("Ev=", Ev, sep="")]]$CumHaz.CI.L, 
                          CIU.CumHaz=res3$trt.1[[paste("Ev=", Ev, sep="")]]$CumHaz.CI.U),
             data.frame(time=res1$time, population=1, TRT=0,
                          CumHaz=res1$trt.0[[paste("Ev=", Ev, sep="")]]$CumHaz, 
                          CIL.CumHaz=res1$trt.0[[paste("Ev=", Ev, sep="")]]$CumHaz.CI.L, 
                          CIU.CumHaz=res1$trt.0[[paste("Ev=", Ev, sep="")]]$CumHaz.CI.U),
              data.frame(time=res2$time, population=2, TRT=0,
                          CumHaz=res2$trt.0[[paste("Ev=", Ev, sep="")]]$CumHaz, 
                          CIL.CumHaz=res2$trt.0[[paste("Ev=", Ev, sep="")]]$CumHaz.CI.L, 
                          CIU.CumHaz=res2$trt.0[[paste("Ev=", Ev, sep="")]]$CumHaz.CI.U),
             data.frame(time=res3$time, population=3, TRT=0,
                          CumHaz=res3$trt.0[[paste("Ev=", Ev, sep="")]]$CumHaz, 
                          CIL.CumHaz=res3$trt.0[[paste("Ev=", Ev, sep="")]]$CumHaz.CI.L, 
                          CIU.CumHaz=res3$trt.0[[paste("Ev=", Ev, sep="")]]$CumHaz.CI.U)
             )
  df$population <- factor(df$population)
  levels(df$population) <- c("overlap", "stab.ATE", "unadjusted")
  df
}

df.CumHaz <- list()
E.set <- sort(unique(rhc$E))
E.set <- E.set[E.set!=0] # 0 is for censoring status
for (k in E.set)
  df.CumHaz[[k]] <- get.CumHaz(res.overlap, res.stab.ATE, res.un, k)

plot.CumHaz <- function(df, title, ymax)
{
  p <- ggplot(df, aes(x=time, y=CumHaz, color=population, fill=population, shape=population)) + ggtitle(title) +
    geom_step(size=1.1) + 
 #     geom_ribbon(aes(ymin=CIL.CumHaz, ymax=CIU.CumHaz), alpha=0.2, stat="stepribbon") +
    scale_fill_npg() + scale_color_npg()
  p <- p + xlab("time from admission to ICU (days)") + ylab("Cumulative Hazard (t)") + ylim(0, ymax)+
    theme(axis.text.x = element_text(face="bold", angle=45),
          axis.text.y = element_text(face="bold"), plot.title = element_text(hjust = 0.5))+
    theme(legend.position = c(0.7, 0.3),
          legend.background=element_rect(fill="transparent"),
          panel.grid.minor = element_line(size = .5,colour = "gray92"), 
          panel.grid.major = element_line(size = .5,colour = "#C0C0C0")) +
    geom_vline(xintercept=30, linetype="dashed")
  p
}

## ----echo=TRUE, out.width=c('45%', '45%'), fig.width=4.5, fig.height=4.5, fig.align="center" ,  message=FALSE, warning=FALSE, fig.cap="Cumulative hazards for discharge and in-hospital death. Comparison of 3 target populations.", fig.show='hold'----
plot.CumHaz(df=df.CumHaz[[1]][df.CumHaz[[1]]$TRT==1,], title="RHC: CumHaz of Discharge", ymax=6)
plot.CumHaz(df=df.CumHaz[[1]][df.CumHaz[[1]]$TRT==0,], title="No-RHC: CumHaz of Discharge", ymax=6)
plot.CumHaz(df=df.CumHaz[[2]][df.CumHaz[[2]]$TRT==1,], title="RHC: CumHaz of Death", ymax=2.5)
plot.CumHaz(df=df.CumHaz[[2]][df.CumHaz[[2]]$TRT==0,], title="No-RHC: CumHaz of Death", ymax=2.5)

## ----label=CumHaz4, echo=TRUE, fig.align="center", fig.width=4.5, fig.height=4.5,  message=FALSE, warning=FALSE, fig.cap="Cause-specific cumulative hazards.", fig.show='hold'----
df <- rbind(summary(res.stab.ATE, event=1, estimand="CumHaz"),
            summary(res.stab.ATE, event=2, estimand="CumHaz"))
df$Event_TRT <- factor(2*(df$Event==2) + 1*(df$TRT==0))
levels(df$Event_TRT) <- c("Discharge-RHC", "Discharge-No RHC",
                          "In-hospital death-RHC", "In-hospital death-No RHC")

ggplot(df, aes(x=time, y=CumHaz, color=Event_TRT, fill=Event_TRT,
               shape=Event_TRT, linetype=Event_TRT)) + geom_step(linewidth=1.1) +
  geom_ribbon(aes(ymin=CIL.CumHaz, ymax=CIU.CumHaz), alpha=0.2,
              stat="stepribbon") + scale_color_brewer(palette = "Set1") +
  scale_fill_brewer(palette = "Set1") + xlab("time from admission to ICU (days)") +
  ylab("Cumulative hazard of event by time t") +
  theme(legend.position = c(0.41, 0.83), legend.text=element_text(size=12),
        legend.background=element_rect(fill="transparent"), legend.title=element_blank())+
  geom_vline(xintercept=30, linetype="dashed")


## ----label=Risk4, echo=TRUE, fig.align="center", fig.width=4.5, fig.height=4.5,  message=FALSE, warning=FALSE, fig.cap="Risk curves", fig.show='hold'----
df <- rbind(summary(res.stab.ATE, event=1, estimand="CIF"),
            summary(res.stab.ATE, event=2, estimand="CIF"))
df$Event_TRT <- factor(2*(df$Event==2) + 1*(df$TRT==0))
levels(df$Event_TRT) <- c("Discharge-RHC", "Discharge-No RHC",
                          "In-hospital death-RHC", "In-hospital death-No RHC")

ggplot(df, aes(x=time, y=CIF, color=Event_TRT, fill=Event_TRT,
               shape=Event_TRT, linetype=Event_TRT)) + geom_step(linewidth=1.1) + xlim(0, 200) +
  geom_ribbon(aes(ymin=CIL.CIF, ymax=CIU.CIF), alpha=0.2,
              stat="stepribbon") + scale_color_brewer(palette = "Set1") +
  scale_fill_brewer(palette = "Set1") + xlab("time from admission to ICU (days)") +
  ylab("Probability of event by time t") +
  theme(legend.position = c(0.7, 0.25), legend.text=element_text(size=12),
        legend.background=element_rect(fill="transparent"), legend.title=element_blank())+
  geom_vline(xintercept=30, linetype="dashed")

## ----label=RMT4, echo=TRUE, fig.align="center", fig.width=4.5, fig.height=4.5, message=FALSE, warning=FALSE, fig.cap="Restricted-mean-time-lost/gained due to specific event.", fig.show='hold'----
df <- rbind(summary(res.stab.ATE, event=1, estimand="RMT"),
            summary(res.stab.ATE, event=2, estimand="RMT"))
df$Event_TRT <- factor(2*(df$Event==2) + 1*(df$TRT==0))
levels(df$Event_TRT) <- c("Discharge-RHC", "Discharge-No RHC",
                          "In-hospital death-RHC", "In-hospital death-No RHC")

ggplot(df, aes(x=time, y=RMT, color=Event_TRT, fill=Event_TRT,
               shape=Event_TRT, linetype=Event_TRT)) + geom_line(linewidth=1.1) +
  geom_ribbon(aes(ymin=CIL.RMT, ymax=CIU.RMT), alpha=0.2) +
  scale_color_brewer(palette = "Set1") +
  scale_fill_brewer(palette = "Set1") + xlab("time from admission to ICU (days)") +
  ylab("average time lost due to death (gained by recovery)") +
  theme(legend.position = c(0.35, 0.8), legend.text=element_text(size=12),
        legend.background=element_rect(fill="transparent"), legend.title=element_blank())+
  geom_vline(xintercept=30, linetype="dashed")

## ----label=fig16, echo=TRUE, fig.align="center", fig.width=4.5, fig.height=4.5,   message=FALSE, warning=FALSE, fig.cap="Risk differences for discharge and in-hospital death.", fig.show='hold'----
df <- rbind(summary(res.stab.ATE, event=1, estimand="RD"),
            summary(res.stab.ATE, event=2, estimand="RD"))
df$Event <- as.factor(df$Event)
levels(df$Event) <- c("Discharge", "In-hospital death")

ggplot(df, aes(x=time, y=RD, color=Event, fill=Event,
               shape=Event, linetype=Event)) + geom_step(linewidth=1.1) +
  geom_ribbon(aes(ymin=CIL.RD, ymax=CIU.RD), alpha=0.2,
              stat="stepribbon") + scale_color_brewer(palette = "Set1") +
  scale_fill_brewer(palette = "Set1") + xlab("time from admission to ICU (days)") +
  ylab("Risk difference") + xlim(0, 200) +
  theme(legend.position = c(0.7, 0.6), legend.text=element_text(size=12),
        legend.background=element_rect(fill="transparent"), legend.title=element_blank())+
  geom_vline(xintercept=30, linetype="dashed")

## ----echo=TRUE, fig.align="center", fig.width=4.5, fig.height=4.5,  message=FALSE, warning=FALSE, fig.cap="Difference in average time lost/gained due to treatment.", fig.show='hold'----
df <- rbind(summary(res.stab.ATE, event=1, estimand="ATE.RMT"),
            summary(res.stab.ATE, event=2, estimand="ATE.RMT"))
df$Event <- as.factor(df$Event)
levels(df$Event) <- c("Discharge", "In-hospital death")

ggplot(df, aes(x=time, y=ATE.RMT, color=Event, fill=Event,
               shape=Event, linetype=Event)) + geom_line(linewidth=1.1) +
  geom_ribbon(aes(ymin=CIL.ATE.RMT, ymax=CIU.ATE.RMT), alpha=0.2) +
  scale_color_brewer(palette = "Set1") +
  scale_fill_brewer(palette = "Set1") + xlab("time from admission to ICU (days)") +
  ylab("average time (days) lost/gained due to treatment") +
  theme(legend.position = c(0.7, 0.5), legend.text=element_text(size=12),
        legend.background=element_rect(fill="transparent"), legend.title=element_blank())+
  geom_vline(xintercept=30, linetype="dashed")

## ----echo=TRUE, fig.align="center",  fig.width=5, fig.height=5, message=FALSE, warning=FALSE, fig.cap="Cumulative distribution of length-of-stay in RHC vs No-RHC groups."----
alpha=0.05
# trt.0:
bs.CumPr.0 <- 1-exp(-res.stab.ATE$trt.0[[paste("Ev=", 1, sep="")]]$bs.CumHaz -
                      res.stab.ATE$trt.0[[paste("Ev=", 2, sep="")]]$bs.CumHaz)
CumPr.CIL.0 <- apply(bs.CumPr.0, 2, quantile, prob=alpha/2, na.rm=TRUE)
CumPr.CIU.0 <- apply(bs.CumPr.0, 2, quantile, prob=1-alpha/2, na.rm=TRUE)
# trt.1:
bs.CumPr.1 <- 1-exp(-res.stab.ATE$trt.1[[paste("Ev=", 1, sep="")]]$bs.CumHaz -
                      res.stab.ATE$trt.1[[paste("Ev=", 2, sep="")]]$bs.CumHaz)
CumPr.CIL.1 <- apply(bs.CumPr.1, 2, quantile, prob=alpha/2, na.rm=TRUE)
CumPr.CIU.1 <- apply(bs.CumPr.1, 2, quantile, prob=1-alpha/2, na.rm=TRUE)

df <- rbind(data.frame(time=res.stab.ATE$time, TRT=1,
                       CumPr=1-exp(-res.stab.ATE$trt.1[[paste("Ev=", 1, sep="")]]$CumHaz -
                             res.stab.ATE$trt.1[[paste("Ev=", 2, sep="")]]$CumHaz),
                       CumPr.CIL=CumPr.CIL.1, CumPr.CIU=CumPr.CIU.1),
            data.frame(time=res.stab.ATE$time, TRT=2,
                       CumPr=1-exp(-res.stab.ATE$trt.0[[paste("Ev=", 1, sep="")]]$CumHaz -
                             res.stab.ATE$trt.0[[paste("Ev=", 2, sep="")]]$CumHaz),
                       CumPr.CIL=CumPr.CIL.0, CumPr.CIU=CumPr.CIU.0))
df$TRT <- as.factor(df$TRT)
levels(df$TRT) <- c("RHC", "No-RHC")

ggplot(df, aes(x=time, y=CumPr, color=TRT, fill=TRT, shape=TRT)) +
  geom_step(linewidth=1.1) + scale_color_brewer(palette = "Set1") +
  scale_fill_brewer(palette = "Set1") + xlim(0,100) +
  xlab("time (days)") + ylab("Cumulative distribution of length-of-stay") +
  geom_ribbon(aes(ymin=CumPr.CIL, ymax=CumPr.CIU), alpha=0.2, stat="stepribbon")+
  theme(axis.text.x = element_text(face="bold", angle=45),
        axis.text.y = element_text(face="bold"), plot.title = element_text(hjust = 0.5))+
  theme(legend.position = c(0.6, 0.3), legend.text=element_text(size=12),
        legend.background=element_rect(fill="transparent"), legend.title=element_blank())+
  geom_vline(xintercept=30, linetype="dashed")

## -----------------------------------------------------------------------------
# nonparametric:
res.30 <- fit.nonpar(df=rhc_full, X="T.death.30", E="D.30", trt.formula=trt.formula,
                     wtype="stab.ATE", cens=0, conf.level=0.95, bs=TRUE, nbs.rep=80,
                     seed=17, parallel = FALSE)
# Cox-based estimation:
res.cox.30 <- fit.cox(df=rhc_full, X="T.death.30", E="D.30", trt.formula=trt.formula,
                      wtype="stab.ATE", cens=0, conf.level=0.95, bs=TRUE, nbs.rep=80,
                      seed=17, parallel = FALSE)

## ----echo=TRUE, fig.align="center",  fig.width=5, fig.height=5, message=FALSE, warning=FALSE, fig.cap="log-ratio of cumulative hazards for 30-day mortality, using stabilized ATE weighting."----
df1.30 <- cbind(summary(res.30, event=1, estimand="logHR") %>%
                      select(time, logCumHazR, CIL.logCumHazR, CIU.logCumHazR), Est.method=1)
HR.30 <- res.cox.30$trt.eff[[paste("Ev=", 1, sep="")]][c("log.CumHazR", "log.CumHazR.CI.L", "log.CumHazR.CI.U")]
df2.30 <- data.frame(time=c(min(res.cox.30$time), max(res.cox.30$time)), logCumHazR=HR.30[[1]],
                  CIL.logCumHazR=NA, CIU.logCumHazR=NA, Est.method=2)
df.30 <- rbind(df1.30, df2.30)
df.30$Est.method <- factor(df.30$Est.method)
levels(df.30$Est.method) <- c("nonpar",
                               paste("Cox: ", round(res.cox.30$trt.eff[[paste("Ev=", 1, sep="")]]$log.CumHazR,dig=2),
                                     ", 95% CI=[",
                                     round(res.cox.30$trt.eff[[paste("Ev=", 1, sep="")]]$log.CumHazR.CI.L,dig=2),
                                     ",",
                                     round(res.cox.30$trt.eff[[paste("Ev=", 1, sep="")]]$log.CumHazR.CI.U,dig=2),
                                     "]", sep=""))

ggplot(df.30, aes(x=time, y=logCumHazR, color=Est.method, fill=Est.method, shape=Est.method)) +
  geom_step(linewidth=1.1) + geom_ribbon(aes(ymin=CIL.logCumHazR, ymax=CIU.logCumHazR), alpha=0.2, stat="stepribbon") +
  scale_color_brewer(palette = "Set1") + scale_fill_brewer(palette = "Set1") +
  xlab("time from admission to ICU (days)") + ylim(-0.1, 0.55)+
  ylab("log-ratio of CumHaz(t)") +
  theme(axis.text.x = element_text(face="bold", angle=45),
        axis.text.y = element_text(face="bold"), plot.title = element_text(hjust = 0.5)) +
  theme(legend.position = c(0.6, 0.7),
        legend.title=element_blank(), legend.text=element_text(size=12),
        legend.background=element_rect(fill="transparent"))

## ----label=fig162, echo=TRUE, fig.align="center", fig.width=4.5, fig.height=4.5,   message=FALSE, warning=FALSE, fig.cap="Risk difference for 30-day mortality.", fig.show='hold'----
ggplot(summary(res.30, event=1, estimand="RD"),
  aes(x=time, y=RD)) + geom_step(linewidth=1.1) +
  geom_ribbon(aes(ymin=CIL.RD, ymax=CIU.RD), alpha=0.2,
  stat="stepribbon") + xlab("time from admission to ICU (days)") +
  ylab("Risk difference")

est30 <- get.pointEst(res.30, 30)
cat("30-day risk difference=", round((est30$trt.eff[[1]]$RD), dig=4),
    ", 95% CI=[", round((est30$trt.eff[[1]]$RD.CI.L), dig=4), ", ",
                       round((est30$trt.eff[[1]]$RD.CI.U), dig=4), "]\n", sep="")

## ----label=fig163, echo=TRUE, fig.align="center", fig.width=4.5, fig.height=4.5,   message=FALSE, warning=FALSE, fig.cap="Risk ratio for 30-day mortality.", fig.show='hold'----
ggplot(summary(res.30, event=1, estimand="RR"),
  aes(x=time, y=RR)) + geom_step(linewidth=1.1) +
  geom_ribbon(aes(ymin=CIL.RR, ymax=CIU.RR), alpha=0.2,
  stat="stepribbon") + xlab("time from admission to ICU (days)") +
  ylab("Risk ratio")
cat("30-day risk ratio=", round((est30$trt.eff[[1]]$RR), dig=4),
    ", 95% CI=[", round((est30$trt.eff[[1]]$RR.CI.L), dig=4), ", ",
    round((est30$trt.eff[[1]]$RR.CI.U), dig=4), "]\n", sep="")

## ---- label=figRMT30, echo=TRUE, fig.align="center", fig.width=4.5, fig.height=4.5,  message=FALSE, warning=FALSE, fig.cap="Difference in average time lost due to RHC in the 30-day mortality.", fig.show='hold'----
ggplot(summary(res.30, event=1, estimand="ATE.RMT"),
       aes(x=time, y=ATE.RMT)) + geom_line(linewidth=1.1) +
  geom_ribbon(aes(ymin=CIL.ATE.RMT, ymax=CIU.ATE.RMT), alpha=0.2) +
  xlab("time from admission to ICU (days)") +
  ylab("RMT difference (days)")
cat("30-day RMT difference=", round((est30$trt.eff[[1]]$ATE.RMT), dig=4),
    ", 95% CI=[", round((est30$trt.eff[[1]]$ATE.RMT.CI.L), dig=4), ", ",
    round((est30$trt.eff[[1]]$ATE.RMT.CI.U), dig=4), "]\n", sep="")