## ----label=setup, include = FALSE--------------------------------------------- library(knitr) loadNamespace("data.table") knitr::opts_chunk$set(collapse = TRUE) ## ----label = "workflow", eval = FALSE, fig.cap = "The series of steps needed to impliment pTITAN2", echo = FALSE, results = "asis"---- # DiagrammeR::grViz(diagram = "digraph flowchart { # node [fontname = arial, shape = oval] # tab1 [label = '@@1'] # tab2 [label = '@@2'] # tab3 [label = '@@3'] # tab4 [label = '@@4'] # tab5 [label = '@@5'] # tab6 [label = '@@6'] # tab7 [label = '@@7'] # tab8 [label = '@@8'] # # tab1 -> tab2 -> tab3 -> tab4 -> tab5 -> tab6 -> tab7 -> tab8; # } # [1]: 'Prepare and import the environmental gradient dataset into R' # [2]: 'Prepare and import the taxonomic dataset into R' # [3]: 'Preprocess raw taxonomic data to determine appropriate taxonomic level\\nof resolution (occurrence function)' # [4]: 'Select columns for the taxon level retuned by occurrence function' # [5]: 'Permut the data across treatment labels to generate list of lists' # [6]: 'Set up cluster for parallele processing (optional)' # [7]: 'Run TITAN2 series on original and permuted data sets' # [8]: 'Analyze probability of exceeding observed diffrence in change-point\\nbetween treatments based on distribution of paired change-point differences' # ") ## ----label = "datasets", echo = FALSE, results = "asis"----------------------- d <- data.table::fread(text = " C_IC_D_06_wID|C_IC_D_06_wID.csv|Environmental Gradient|Chaparral|Dry C_IC_N_06_wID|C_IC_N_06_wID.csv|Environmental Gradient|Chaparral|Normal CD_06_Mall_wID|CD_06_Mall_wID.csv|Taxonomic|Chaparral|Dry CN_06_Mall_wID|CN_06_Mall_wID.csv|Taxonomic|Chaparral|Normal ", header = FALSE, col.names = c("R Data", "csv File", "Data Type", "Region", "Treatment") ) knitr::kable(d, caption = "Example data sets provided in pTITAN2.") ## ----label="list-example-data-sets"------------------------------------------- list.files(system.file("extdata", package = "pTITAN2")) ## ----------------------------------------------------------------------------- data(C_IC_D_06_wID, C_IC_N_06_wID, CD_06_Mall_wID, CN_06_Mall_wID, package = "pTITAN2") str(C_IC_D_06_wID) # Environemntal Gradient, Dry Treatment str(C_IC_N_06_wID) # Environemntal Gradient, Normal Treatment dim(CD_06_Mall_wID) # Taxonomic, Dry Treatment dim(CN_06_Mall_wID) # Taxonomic, Normal Treatment ## ---- include = FALSE--------------------------------------------------------- set.seed(42) # set the random number generator for reproduciblity eg_permutation <- data.table::data.table(site = c("A", "A", "B", "C", "C", "D", "E"), trt0 = c(1, 2, 1, 1, 2, 2, 2)) eg_permutation[, site_n := .N, keyby = .(site)] eg_permutation[site_n > 1, trt1 := sample(trt0), by = .(site)] eg_permutation[site_n == 1, trt1 := sample(trt0)] eg_permutation[site_n > 1, trt2 := sample(trt0), by = .(site)] eg_permutation[site_n == 1, trt2 := sample(trt0)] eg_permutation[site_n > 1, trt3 := sample(trt0), by = .(site)] eg_permutation[site_n == 1, trt3 := sample(trt0)] eg_permutation[site_n > 1, trt4 := sample(trt0), by = .(site)] eg_permutation[site_n == 1, trt4 := sample(trt0)] eg_permutation[, site_n := NULL] ## ----label = "exampleTreatmentPermutation", echo = FALSE, results = "asis"---- knitr::kable(eg_permutation, caption = "Example distribution of sites and perutated treatment labels.", align = "c") ## ----------------------------------------------------------------------------- library(pTITAN2) library(magrittr) loadNamespace("data.table") loadNamespace("dplyr") loadNamespace("tidyr") ## ----------------------------------------------------------------------------- dim(CD_06_Mall_wID) # top 4 rows and first 10 columns CD_06_Mall_wID[1:4, 1:10] ## ----label = "occurrences_example"-------------------------------------------- head(occurrences(CN_06_Mall_wID[, -1])) ## ----------------------------------------------------------------------------- # Tidyverse CN_06_Mall_wID %>% dplyr::select(-StationID) %>% tidyr::pivot_longer(cols = dplyr::everything(), names_to = 'taxon', values_to = 'count') %>% dplyr::mutate( Class = substr(.data$taxon, 1L, 2L), Order = substr(.data$taxon, 3L, 4L), Family = substr(.data$taxon, 5L, 6L), Genus = substr(.data$taxon, 7L, 8L) ) %>% dplyr::group_by(.data$Class, .data$Order, .data$Family, .data$Genus) %>% dplyr::summarise( taxon = unique(.data$taxon), count = sum(.data$count > 0), .groups = "keep" ) %>% dplyr::ungroup() %>% dplyr::arrange(.data$Class, .data$Order, .data$Family, .data$Genus) ## ----------------------------------------------------------------------------- # data.table taxon_count <- data.table::copy(CN_06_Mall_wID) data.table::setDT(taxon_count) data.table::set(taxon_count, j = "StationID", value = NULL) for(j in as.integer(which(sapply(taxon_count, is.integer)))) { data.table::set(taxon_count, j = j, value = as.numeric(taxon_count[[j]])) } taxon_count <- data.table::melt(taxon_count, variable.factor = FALSE, measure.vars = names(taxon_count), variable.name = "taxon") taxon_count[, value := sum(value > 0), keyby = .(taxon)] taxon_count <- unique(taxon_count) data.table::set(taxon_count, j = "Class", value = substr(taxon_count[["taxon"]], 1L, 2L)) data.table::set(taxon_count, j = "Order", value = substr(taxon_count[["taxon"]], 3L, 4L)) data.table::set(taxon_count, j = "Family", value = substr(taxon_count[["taxon"]], 5L, 6L)) data.table::set(taxon_count, j = "Genus", value = substr(taxon_count[["taxon"]], 7L, 8L)) data.table::setkeyv(taxon_count, c("taxon", "Class", "Order", "Family", "Genus")) taxon_count[grepl("^(Ar|Bi).+", taxon)] ## ----------------------------------------------------------------------------- eg <- permute(taxa = list(CD_06_Mall_wID, CN_06_Mall_wID), envs = list(C_IC_D_06_wID, C_IC_N_06_wID), sid = "StationID") str(eg, max.level = 2) ## ----------------------------------------------------------------------------- system.file("example-scripts/permutation_example.R", package = "pTITAN2") ## ----------------------------------------------------------------------------- permutation_example ## ---- results = "hide"-------------------------------------------------------- # Run TITAN on the observed data # limited to 2 cores for CRAN policies, end users can use more cores # the following uses the tidyverse dialect, load and attach the magrittr # namespace, use dplyr and tidyr namespaces explicitly. library(magrittr) CD_obs <- TITAN2::titan( env = C_IC_D_06_wID[["ImpCover"]] # chaparral envgrad dry , txa = subset(CD_06_Mall_wID, select = occurrences(CD_06_Mall_wID[, -1], n = 6L)$taxon) , ncpus = 2 , numPerm = 50 , nBoot = 50 ) CN_obs <- TITAN2::titan( env = C_IC_N_06_wID[["ImpCover"]] # chaparral envgrad dry , txa = subset(CN_06_Mall_wID, select = occurrences(CN_06_Mall_wID[, -1], n = 6L)$taxon) , ncpus = 2 , numPerm = 50 , nBoot = 50 ) ## ----------------------------------------------------------------------------- # create a table of the median change point from TITAN for calculating the p-values TITAN_med <- rbind( cbind(as.data.frame(CD_obs$sumz), "run" = "Tr1_CD") , cbind(as.data.frame(CN_obs$sumz), "run" = "Tr2_CN") ) TITAN_med[["sumz"]] <- sub("(.+)(\\d)$", "\\1", rownames(TITAN_med)) TITAN_med <- TITAN_med %>% dplyr::select(run, sumz, `0.50`) %>% tidyr::pivot_wider(names_from = run, values_from = "0.50") %>% dplyr::mutate(T1T2_abs = abs(Tr1_CD - Tr2_CN)) %>% print ## ----------------------------------------------------------------------------- # Create a summary table of the permutation data tr1_filt <- permutation_example %>% dplyr::select(permutation, `trt1cpsumz-`, `trt1cpsumz+`) %>% tidyr::pivot_longer(cols = `trt1cpsumz-`:`trt1cpsumz+`, values_to = "Tr1_CD", names_to = "sumz") tr1_filt[which(tr1_filt$sumz == "trt1cpsumz-"), 2] <- "fsumz-" tr1_filt[which(tr1_filt$sumz == "trt1cpsumz+"), 2] <- "fsumz+" tr2_filt <- permutation_example %>% dplyr::select(permutation, `trt2cpsumz-`, `trt2cpsumz+`) %>% tidyr::pivot_longer(cols = `trt2cpsumz-`:`trt2cpsumz+`, values_to = "Tr2_CN", names_to = "sumz") tr2_filt[which(tr2_filt$sumz == "trt2cpsumz-"), 2] <- "fsumz-" tr2_filt[which(tr2_filt$sumz == "trt2cpsumz+"), 2] <- "fsumz+" # mutate the data and create a summary table for calculating p-values out_perm <- dplyr::left_join(tr1_filt, tr2_filt) %>% dplyr::mutate(T1T2_abs = abs(Tr1_CD - Tr2_CN)) %>% dplyr::select(-permutation) # Calulate the p-values dplyr::tibble(treatment = "T1T2_CDCN", "fsumz-" = (sum(( dplyr::filter(out_perm, sumz == "fsumz-")$T1T2_abs > (dplyr::filter(TITAN_med, sumz == "fsumz-")$T1T2_abs) )) + 1) / 1001, "fsumz+" = (sum(( dplyr::filter(out_perm, sumz == "fsumz+")$T1T2_abs > (dplyr::filter(TITAN_med, sumz == "fsumz+")$T1T2_abs) )) + 1) / 1001)