## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=6, fig.height=4 ) ## ----warning=FALSE, message=FALSE--------------------------------------------- library(nncc) library(survival) library(dplyr) library(ggplot2) data(anifood) ## ----------------------------------------------------------------------------- dim(anifood) ## ----------------------------------------------------------------------------- # exposures of interest. In a real study, this list can be much longer exp_interest <- c("exp01","exp09", "exp27") # exposures to be controlled for any exposure of interest exp_match <- setdiff(names(anifood), "case") # variables to be excluded from matching for each exposure # both exp_var and rm_vars are character variables excl_vars %>% head ## ----------------------------------------------------------------------------- threshold_results <- get_threshold(anifood, exp_match, p_threshold = 0.50) ## ----warning=FALSE------------------------------------------------------------ distance_density_plot(threshold_results) + ggtitle("Example of distance_density_plot") ## ----------------------------------------------------------------------------- threshold_model_plot(threshold_results, p_threshold = 0.50) + ggtitle("Example of threshold_model_plot") ## ----------------------------------------------------------------------------- # create a variable (i.e., pair) indicating originally matched pairs anifood_matched <- anifood %>% group_by(case) %>% mutate(pair = seq_along(case)) %>% ungroup ## ----------------------------------------------------------------------------- p <- original_compare_plot(anifood_matched, case, pair, threshold_results) # the density plot of distance between originally matched cases and controls p$plot_density + ggtitle("Example of original_compare_plot") # proportion of originally matched cases and controls with a distance greater than the threshold p$prop_distance_gt_threshold ## ----warning=FALSE, message=FALSE--------------------------------------------- library(furrr) strata1 <- future_map(exp_interest, make_knn_strata, rmvars = excl_vars, matchvars = exp_match, df = anifood) %>% setNames(exp_interest) ## ----warning=FALSE------------------------------------------------------------ length(strata1) == length(exp_interest) # rows in a matched data set all.equal(anifood %>% filter(case == 1) %>% NROW %>% `*`(250 + 1), NROW(strata1[[1]])) ## ----warning = FALSE---------------------------------------------------------- strata2 <- future_map(exp_interest, make_analysis_set, stratified_data = strata1, data = anifood, maxdist = threshold_results$threshold) %>% setNames(exp_interest) ## ----message=FALSE------------------------------------------------------------ strata3 <- finalize_data(strata2) ## ----------------------------------------------------------------------------- # exposures to which neither cases nor controls were exposed strata3 %>% lapply(function(dfm) dfm %>% mutate(case = as.character(case), exp = as.character(exp)) %>% filter(case != "" & exp != "") %>% with(table(case, exp))) %>% lapply(function(x) x==0) %>% lapply(function(x) {sum = sum(x); length = length(x); cbind(sum, length)}) %>% do.call(rbind,.) %>% as.data.frame %>% mutate(var = names(strata3)) %>% filter(sum >= 2 | length < 4) %>% select(var) %>% unclass %>% unlist -> expvars_invalid expvars_invalid # none exposed and odds ratio cannot be estimated strata3[["exp09"]] %>% with(table(case, exp)) data_final <- strata3[setdiff(names(strata3), expvars_invalid)] ## ----------------------------------------------------------------------------- or_mh <- future_map(data_final, function(dfm) with(dfm, test_mh(case = case, exp = exp, strata = strata))) or_mh[["exp01"]] ## ----------------------------------------------------------------------------- data_final[["exp27"]] %>% select(case, exp) %>% table or_mh[["exp27"]] ## ----warning=FALSE------------------------------------------------------------ results_clogit <- future_map(data_final, function(dfm){clogit(case ~ exp + strata(strata) , data = dfm)}) results_clogit[["exp27"]] %>% summary() %>% `$`(conf.int) ## ----warning=FALSE, eval=FALSE------------------------------------------------ # library(rstanarm) # # results_stan_clogit <- future_map(data_final, function(dfm) { # dfm$strata <- factor(dfm$strata) # stan_clogit(case ~ exp, # data = dfm, # strata = strata, # # rstanarm suggests us specifying priors even if they are the # # defaults because they might change in the future # prior = normal(0, 4), # #default: prior = normal(autoscale = TRUE), # prior_covariance = decov(), # iter = 100, # chains = 2, # prior_PD = FALSE)}) # for speed only ## ----warning=FALSE------------------------------------------------------------ library(logistf) # regression coefficients coef_logistf <- future_map(data_final, function(dfm){ if(length(unique(dfm[["strata"]])) == 1) { x <- model.matrix(case ~ exp, data = dfm) plconf <- grep("exp", colnames(x)) o <- logistf(case ~ exp, data = dfm, plconf = plconf) %>% `[`(c("terms", "coefficients", "ci.lower", "ci.upper", "prob", "call", "loglik", "model")) } else { x <- model.matrix(case ~ exp + strata, data = dfm) plconf <- grep("exp", colnames(x)) o <- logistf(case ~ exp + strata, data = dfm, plconf = plconf) %>% `[`(c("terms", "coefficients", "ci.lower", "ci.upper", "prob", "call", "loglik", "model")) } }, .progress = TRUE) # odds ratios or_logistf <- lapply(names(coef_logistf), function(var_name){ coef_logistf[[var_name]] %>% `[`(c("terms", "coefficients", "ci.lower", "ci.upper", "prob")) %>% bind_cols %>% filter(grepl("exp", terms)) %>% mutate(variable = var_name, or = exp(coefficients), ci.lower = exp(ci.lower), ci.upper = exp(ci.upper)) %>% select(variable, terms, or, ci.lower, ci.upper, prob)}) %>% setNames(names(coef_logistf)) # odds ratio for exp27 or_logistf[["exp27"]] ## ----------------------------------------------------------------------------- # prepare a data frame for calculating PAF df_or <- bind_rows(or_logistf) df_or ## ----message=FALSE------------------------------------------------------------ # point estimate of PAF paf <- get_paf(df_or = df_or, which_or = or, exp_var = variable, exp_level = terms, df_matched = data_final) paf # lower confidence limit of PAF paf_ci.lower <- get_paf(df_or = df_or, which_or = ci.lower, exp_var = variable, exp_level = terms, df_matched = data_final) ## ----eval = FALSE------------------------------------------------------------- # strata1 <- cacheit("abc", # future_map(exp_interest, make_knn_strata, rmvars = excl_vars, matchvars = exp_match, df = anifood) %>% # setNames(exp_interest), # clearcache = FALSE) ## ----eval = FALSE------------------------------------------------------------- # library(nncc) # library(dplyr) # library(furrr) # library(future.batchtools) # # the workers argument is used to define the number of cores for the analysis. By default, all cores will be used. # plan(multisession, workers = 3) # # strata1 <- future_map(exp_interest, make_knn_strata, rmvars = excl_vars, matchvars = exp_match, df = anifood) %>% # setNames(exp_interest) # ## ----eval=FALSE, comment = ""------------------------------------------------- # #!/bin/bash -l # # # name of the job is helloR # #$ -N helloR # # #$ -cwd # # #$ -V # # #$ -pe smp 2-12 # # #$ -q all.q # # module load R/3.6.2 # # # to execute fugure-map.R # Rscript future-map.R # # exit 0 ## ----eval=FALSE--------------------------------------------------------------- # library(furrr) # library(future.batchtools) # # plan(batchtools_sge) # # future_map(, function(x){...}, .progress = TRUE)