## ----set-options, echo=FALSE, cache=FALSE, include=FALSE---------------------- library(knitr) opts_chunk$set(tidy=FALSE, prompt=FALSE, warning=FALSE, message = FALSE) opts_chunk$set(comment="#") ## ---- eval=FALSE-------------------------------------------------------------- # library(devtools) # install_github("multifunc", username="jebyrnes") ## ---- warning=FALSE, results="hide", message=FALSE, error=FALSE--------------- library(multifunc) #for plotting library(ggplot2) library(patchwork) #for data library(tidyr) library(dplyr) library(purrr) library(forcats) #for analysus library(car) ## ----------------------------------------------------------------------------- #Read in data to run sample analyses on the biodepth data data(all_biodepth) allVars<-c("biomassY3", "root3", "N.g.m2", "light3", "N.Soil", "wood3", "cotton3") varIdx<-which(names(all_biodepth) %in% allVars) ## ----------------------------------------------------------------------------- ####### # Now, specify what data we're working with - Germany # and the relevant variables we'll be working with ####### germany<- all_biodepth %>% filter(location=="Germany") # which variables have > 2/3 of the values not NA? german_vars <- whichVars(germany, allVars) # What are the names of species in this dataset # that have at least some values > 0? species <- relevantSp(germany,26:ncol(germany)) spIDX <- which(names(germany) %in% species) #in case we need these ## ----------------------------------------------------------------------------- germanyForPlotting <- germany %>% select(Diversity, german_vars) %>% pivot_longer(cols = german_vars, names_to = "variable", values_to = "value") %>% mutate(variable = factor(variable)) #make the levels of the functions into something nice for plots levels(germanyForPlotting$variable) <- c('Aboveground Biomass', 'Root Biomass', 'Cotton Decomposition', 'Soil Nitrogen', 'Plant Nitrogen') ## ----------------------------------------------------------------------------- germanyLabels <- germanyForPlotting %>% group_by(variable) %>% nest() %>% mutate(fits = map(data, ~lm(value ~ Diversity, data=.x)), stats = map(fits, broom::glance)) %>% unnest(stats) %>% ungroup() %>% mutate(labels = paste0("p = ", round(p.value, 3), ", ", expression(R^2), " = ", round(r.squared, 2)), labels = gsub("p = 0 ", "p < 0.001 ", labels), Diversity = 7, value=c(2000, 1200, 1.5, 6, 20)) ## ---- fig.width=12, fig.height=8---------------------------------------------- ggplot(aes(x=Diversity, y=value),data=germanyForPlotting) + geom_point(size=3)+ facet_wrap(~variable, scales="free") + theme_bw(base_size=15)+ stat_smooth(method="lm", colour="black", size=2) + xlab("Species Richness") + ylab("Value of Function") + geom_text(data=germanyLabels, aes(label=labels)) + theme(panel.grid = element_blank()) ## ----------------------------------------------------------------------------- #pull out only those plots planted in monoculture monoGermany<- germany %>% filter(rowSums(select(., ACHMIL1:VICTET1))==1) %>% #get mono names pivot_longer(ACHMIL1:VICTET1, names_to = "mono") %>% filter(value !=0) %>% select(-value) %>% #pivot by function pivot_longer(german_vars, names_to = "variable") %>% #get the mean for each monoculture group_by(variable, mono) %>% summarize(value = mean(value)) #now, who is the best performer? monoGermany %>% group_by(variable) %>% filter(value == max(value)) ## ----------------------------------------------------------------------------- germany <- germany %>% mutate(N.Soil = -1*N.Soil +max(N.Soil, na.rm=T)) ## ----------------------------------------------------------------------------- spList<-sAICfun("biomassY3", species, germany) spList ## ----------------------------------------------------------------------------- redund<-getRedundancy(german_vars, species, germany) coefs<-getRedundancy(german_vars, species, germany, output="coef") stdCoefs<-stdEffects(coefs, germany, german_vars, species) #for example redund ## ----fig.height=8, fig.width=12----------------------------------------------- #plot the num. functions by fraction of the species pool needed posCurve<-divNeeded(redund, type="positive") posCurve$div<-posCurve$div/ncol(redund) pc<-qplot(nfunc, div, data=posCurve, group=nfunc, geom=c("boxplot"))+ geom_jitter(size=4, position = position_jitter(height=0.001, width = .04))+ ylab("Fraction of Species Pool\nwith Positive Effect\n")+ xlab("\nNumber of Functions")+theme_bw(base_size=24)+ylim(c(0,1)) negCurve<-divNeeded(redund, type="negative") negCurve$div<-negCurve$div/ncol(redund) nc<-qplot(nfunc, div, data=negCurve, group=nfunc, geom=c("boxplot"))+ geom_jitter(size=4, position = position_jitter(height=0.001, width = .04))+ ylab("Fraction of Species Pool\nwith Negative Effect\n")+ xlab("\nNumber of Functions")+theme_bw(base_size=24)+ylim(c(0,1)) #combine these into one plot pc + nc + plot_annotation(tag_levels = 'A') ## ----------------------------------------------------------------------------- allOverlapPos <- map_df(2:nrow(redund), ~getOverlapSummary(redund, m = .x, denom = "set"), .id = "m") allOverlapPos ## ----------------------------------------------------------------------------- posNeg<-data.frame(Species = colnames(redund), Pos = colSums(filterOverData(redund)), Neg = colSums(filterOverData(redund, type="negative"))) #plot it ggplot(aes(x=Pos,y= Neg), data=posNeg) + geom_jitter(position = position_jitter(width = 0.05, height = 0.05), size=5, shape=1) + theme_bw(base_size=18) + xlab("\n# of Positive Contributions per species\n") + ylab("# of Negative Contributions per species\n") + annotate("text", x=0, y=3, label="a)") + stat_smooth(method="glm", colour="black", size=2, method.args = list(family=poisson(link="identity"))) + geom_abline(slope = 1, intercept = 0, lty = 2, color = "red") ## ----------------------------------------------------------------------------- #now get the standardized effect sizes posNeg<-within(posNeg, { stdPosMean <- colSums(filterCoefData(stdCoefs))/Pos stdPosMean[which(is.nan(stdPosMean))] <-0 stdNegMean <- colSums(filterCoefData(stdCoefs, type="negative"))/Neg stdNegMean[which(is.nan(stdNegMean))] <-0 }) ggplot(aes(x=stdPosMean,y= stdNegMean), data=posNeg) + # geom_point(size=3) + geom_jitter(position = position_jitter(width = .02, height = .02), size=5, shape=1) + theme_bw(base_size=18) + xlab("\nAverage Standardized Size of\nPositive Contributions") + ylab("Average Standardized Size of\nNegative Contributions\n") + stat_smooth(method="lm", colour="black", size=2)+ annotate("text", x=0, y=0.25, label="b)") + geom_abline(slope=-1, intercept=0, size=1, colour="black", lty=3) ## ----------------------------------------------------------------------------- #add on the new functions along with the averaged multifunctional index germany<-cbind(germany, getStdAndMeanFunctions(germany, german_vars)) #germany<-cbind(germany, getStdAndMeanFunctions(germany, vars, standardizeZScore)) ## ----------------------------------------------------------------------------- #plot it ggplot(aes(x=Diversity, y=meanFunction),data=germany)+geom_point(size=3)+ theme_bw(base_size=15)+ stat_smooth(method="lm", colour="black", size=2) + xlab("\nSpecies Richness") + ylab("Average Value of Standardized Functions\n") ## ----fig.width=16, fig.height=8----------------------------------------------- #reshape for plotting everything with ggplot2 germanyMeanForPlotting <- germany %>% select(Diversity, biomassY3.std:meanFunction) %>% pivot_longer(cols = c(biomassY3.std:meanFunction), names_to = "variable") %>% mutate(variable = fct_inorder(variable)) #nice names for plotting levels(germanyMeanForPlotting$variable) <- c('Aboveground Biomass', 'Root Biomass', 'Cotton Decomposition', 'Soil Nitrogen', 'Plant Nitrogen', 'Mean Multifuncion Index') #plot it ggplot(aes(x=Diversity, y=value),data=germanyMeanForPlotting)+geom_point(size=3)+ facet_grid(~variable) + theme_bw(base_size=15)+ stat_smooth(method="lm", colour="black", size=2) + xlab("\nSpecies Richness") + ylab("Standardized Value of Function\n") ## ----------------------------------------------------------------------------- #statistical fit aveFit<-lm(meanFunction ~ Diversity, data=germany) Anova(aveFit) summary(aveFit) ## ----------------------------------------------------------------------------- germanyThresh<-getFuncsMaxed(germany, german_vars, threshmin=0.05, threshmax=0.99, prepend=c("plot","Diversity"), maxN=7) ## ----------------------------------------------------------------------------- mfuncGermanyLinear08<-glm(funcMaxed ~ Diversity, data=subset(germanyThresh, germanyThresh$thresholds=="0.8"), family=quasipoisson(link="identity")) Anova(mfuncGermanyLinear08, test.statistic="F") summary(mfuncGermanyLinear08) ## ----------------------------------------------------------------------------- gcPlot<-subset(germanyThresh, germanyThresh$thresholds %in% qw(0.2, 0.4, 0.6, 0.8)) #note, using qw as %in% is a string comparison operator gcPlot$percent<-paste(100*gcPlot$thresholds, "%", sep="") qplot(Diversity, funcMaxed, data=gcPlot, facets=~percent) + stat_smooth(method="glm", method.args = list(family=quasipoisson(link="identity")), colour="red", lwd=1.2) + ylab(expression("Number of Functions" >= Threshold)) + xlab("Species Richness") + theme_bw(base_size=14) + geom_text(data=data.frame(percent = unique(gcPlot$percent), lab = paste(letters[1:4], ")", sep=""), Diversity=2, funcMaxed=6 ), mapping=aes(x=Diversity, y=funcMaxed, label=lab)) ## ----------------------------------------------------------------------------- germanyThresh$percent <- 100*germanyThresh$thresholds ggplot(data=germanyThresh, aes(x=Diversity, y=funcMaxed, group=percent)) + ylab(expression("Number of Functions" >= Threshold)) + xlab("Species Richness") + stat_smooth(method="glm", method.args = list(family=quasipoisson(link="identity")), lwd=0.8, fill=NA, aes(color=percent)) + theme_bw(base_size=14) + scale_color_gradient(name="Percent of \nMaximum", low="blue", high="red") ## ---- fig.height=8, fig.width=12---------------------------------------------- germanyLinearSlopes<-getCoefTab(funcMaxed ~ Diversity, data = germanyThresh, coefVar = "Diversity", family = quasipoisson(link="identity")) ###### # Plot the values of the diversity slope at # different levels of the threshold ###### germanSlopes <- ggplot(germanyLinearSlopes, aes(x=thresholds*100, y = estimate, ymax = estimate + 1.96 * std.error, ymin = estimate - 1.96 * std.error)) + geom_ribbon(fill="grey50") + geom_point() + ylab("Change in Number of Functions per Addition of 1 Species\n") + xlab("\nThreshold (%)") + geom_abline(intercept=0, slope=0, lwd=1, linetype=2) + theme_bw(base_size=14) germanSlopes ## ----------------------------------------------------------------------------- germanIDX <- getIndices(germanyLinearSlopes, germanyThresh, funcMaxed ~ Diversity) germanIDX ## ---- fig.height=8, fig.width=12---------------------------------------------- germanyLinearSlopes$estimate[which(germanyLinearSlopes$thresholds==germanIDX$Tmde)] germanyThresh$IDX <- 0 germanyThresh$IDX [which(germanyThresh$thresholds %in% c(germanIDX$Tmin, germanIDX$Tmax, germanIDX$Tmde))] <- 1 ggplot(data=germanyThresh, aes(x=Diversity, y=funcMaxed, group=percent)) + ylab(expression("Number of Functions" >= Threshold)) + xlab("Species Richness") + geom_smooth(method="glm", method.args = list(family=quasipoisson(link="identity")), fill=NA, aes(color=percent, lwd=IDX)) + theme_bw(base_size=14) + scale_color_gradient(name="Percent of \nMaximum", low="blue", high="red") + scale_size(range=c(0.3,5), guide="none") + annotate(geom="text", x=0, y=c(0.2,2,4.6), label=c("Tmax", "Tmde", "Tmin")) + annotate(geom="text", x=16.7, y=c(germanIDX$Mmin, germanIDX$Mmax, germanIDX$Mmde), label=c("Mmin", "Mmax", "Mmde")) ## ---- fig.height=8, fig.width=12---------------------------------------------- germanSlopes + annotate(geom="text", y=c(-0.01, -0.01, -0.01, germanIDX$Rmde.linear+0.02), x=c(germanIDX$Tmin*100, germanIDX$Tmde*100, germanIDX$Tmax*100, germanIDX$Tmde*100), label=c("Tmin", "Tmde", "Tmax", "Rmde"), color="black") ## ---- error=FALSE, warning=FALSE, message=FALSE------------------------------- #### # Now we will look at the entire BIODEPTH dataset # # Note the use of dplyr to generate the new data frame using all locations from biodepth. # If you're not using dplyr for your data aggregation, you should be! # It will save you a lot of time and headaches in the future. #### #Read in data to run sample analyses on the biodepth data data(all_biodepth) sub_biodepth<-subset(all_biodepth, all_biodepth$location %in% c("Sheffield", "Portugal", "Sweden")) sub_biodepth$location<-factor(sub_biodepth$location, levels=c("Portugal", "Sweden", "Sheffield")) allVars<-qw(biomassY3, root3, N.g.m2, light3, N.Soil, wood3, cotton3) varIdx<-which(names(sub_biodepth) %in% allVars) #re-normalize so that everything is on the same #sign-scale (e.g. the maximum level of a function is #the "best" function) sub_biodepth <- sub_biodepth %>% group_by(location) %>% mutate( light3 <- ifelse(sum(is.na(light3)==0), -1*light3+max(light3, na.rm=T), NA), N.Soil <- ifelse(sum(is.na(N.Soil)==0), -1*N.Soil+max(N.Soil, na.rm=T), NA) ) %>% ungroup() #get thresholds bdThreshes<-sub_biodepth %>% group_by(location) %>% nest() %>% mutate(fmaxed = map(data, getFuncsMaxed, vars=allVars, maxN=8)) %>% ungroup() %>% unnest(fmaxed) ####look at slopes #note, maxIT argument is due to some fits needing more iterations to converge bdLinearSlopes<-getCoefTab(funcMaxed ~ Diversity, data=bdThreshes, groupVar=c("location", "thresholds"), coefVar="Diversity", family=quasipoisson(link="identity"), control=list(maxit=800)) ## ---- error=FALSE, warning=FALSE, message=FALSE------------------------------- indexTable <- lapply(levels(bdLinearSlopes$location), function(x){ slopedata <- subset(bdLinearSlopes, bdLinearSlopes$location==x) threshdata <- subset(bdThreshes, bdThreshes$location==x) ret <- getIndices(slopedata, threshdata, funcMaxed ~ Diversity) ret<-cbind(location=x, ret) ret }) indexTable <- do.call(rbind, indexTable) indexTable <- rbind(data.frame(location="Germany", germanIDX), indexTable) indexTable ## ---- fig.height=8, fig.width=10---------------------------------------------- library(gridExtra) bdCurvesAll<-qplot(Diversity, funcMaxed, data=bdThreshes, group=thresholds, alpha=I(0)) + facet_wrap(~location)+#, scales="free") + scale_color_gradient(low="blue", high="red", name="Proportion of \nMaximum", guide=FALSE) + stat_smooth(method="glm", lwd=0.8, fill=NA, method.args = list(family=gaussian(link="identity"), control=list(maxit=200)), aes(color=thresholds)) + ylab("\nNumber of Functions ≥ Threshold\n\n") + xlab("Species Richness") + theme_bw(base_size=15) + geom_text(data=data.frame(location = levels(bdThreshes$location), lab = paste(letters[1:3], ")", sep=""), thresholds=c(NA, NA, NA)), x=1, y=7, mapping=aes(label=lab)) #Plot it! slopePlot<-ggplot(bdLinearSlopes, aes(x=thresholds*100, y=estimate, ymin = estimate - 2*std.error, ymax = estimate + 2*std.error)) + geom_ribbon(fill="grey50") + geom_point() + ylab("Change in Number of Functions \nper Addition of 1 Species\n") + xlab("Threshold (%)") + facet_wrap(~location)+#, scale="free") + geom_abline(intercept=0, slope=0, lwd=0.6, linetype=2) + theme_bw(base_size=15)+ geom_text(data=data.frame(location = levels(bdThreshes$location), lab = paste(letters[4:6], ")", sep=""), thresholds=c(NA, NA, NA), std.error=c(NA, NA, NA), estimate = c(NA, NA, NA) ), x=0.05, y=0.27, mapping=aes(label=lab)) ###Plot them in a single figure grid.arrange(bdCurvesAll, slopePlot)