## ---- echo = FALSE, eval = TRUE-----------------------------------------------
# First, load the package and the dataset. 
library(growthcurver)

# Load the sample growth curve data provided with the package 
# The first column is the time in hours, and there is one column 
# for each well in a 96-well plate.
d <- growthdata
knitr::kable(d[1:10, 1:8])

## ---- eval = FALSE------------------------------------------------------------
#  # Replace the next line with the location and name of your input data file.
#  file_name <- "the/path/to/my/data/myfilename.txt"
#  d <- read.table(file_name, header = TRUE, sep = "\t", stringsAsFactors = FALSE)

## ---- eval = FALSE------------------------------------------------------------
#  # Convert the "time" column from hours to minutes
#  d$time <- d$time * 60
#  
#  # Convert the "time" column from minutes to seconds
#  d$time <- d$time * 60
#  
#  # Convert the "time" column from seconds to hours
#  d$time <- d$time / 60 / 60

## ---- eval = TRUE-------------------------------------------------------------
# First, load the package. 
library(growthcurver)

# Load the sample growth curve data provided in the Growthcurver package.
# The first column is the time in hours, and there is one column 
# for each well in a 96-well plate.
d <- growthdata

# Now, we'll use Growthcurver to summarize the growth curve data using the 
# simple background correction method (minimum value correction). This is the 
# default method, so we don't need to specify it in the command.
# This returns an object of type "gcfit" that holds information about
# the best parameters, the model fit, and additional metrics summarizing
# the growth curve.
gc_fit <- SummarizeGrowth(d$time, d$A1)

# It is easy to get the most useful metrics from a gcfit object, just type:
gc_fit

# And it is easy to plot the raw data and the best fit logistic curve
plot(gc_fit)

## ---- eval = FALSE------------------------------------------------------------
#  # The gcfit object returned from SummarizeGrowth also contains further metrics
#  # summarizing the growth curve data.
#  gc_fit$vals
#  
#  # look at the structure of the gc_fit object
#  str(gc_fit)

## ---- eval = TRUE-------------------------------------------------------------
# To see all the available metrics 
str(gc_fit$vals)

# To access a single metric (for example the growth rate r)
gc_fit$vals$r


## ---- eval = TRUE-------------------------------------------------------------
# First, load the package and the sample dataset. 
library(growthcurver)
d <- growthdata

## ---- eval = FALSE------------------------------------------------------------
#  # To analyze your data from Excel, you should read your data into the variable
#  # called d. To do so, replace the next line with the name and location of
#  # your input data file.
#  file_name <- "the/path/to/my/data/myfilename.txt"
#  d <- read.table(file_name, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
#  
#  # Make sure that you have a column called "time" (and a column called "blank"
#  # if you are using "blanks" for your background correction). See the
#  # "Input Data" data section of the Vignette if you need help with this.

## ---- eval = TRUE-------------------------------------------------------------
# Now, we'll use Growthcurver to summarize the growth curve data for the entire
# plate using the default background correction method ("min").
gc_out <- SummarizeGrowthByPlate(d)

## ---- eval = FALSE------------------------------------------------------------
#  # If you would like to use the "blank" background correction, then call
#  # Growthcurver as follows
#  gc_out <- SummarizeGrowthByPlate(d, bg_correct = "blank")
#  
#  # If you would like to generate plots for all of the growth curves in your
#  # plate, then call Growthcurver as follows. You can change the name of
#  # the output file "gc_plots.pdf" to something that makes sense for you.
#  gc_out <- SummarizeGrowthByPlate(d, plot_fit = TRUE,
#                                   plot_file = "gc_plots.pdf")
#  
#  # The summary information for each well is listed as a row in the output
#  # data frame called gc_out.
#  
#  # We can look at the first few rows in the output using the head command.
#  head(gc_out)

## ---- eval = TRUE, echo = FALSE-----------------------------------------------
knitr::kable(gc_out[1:5, ])

## ---- eval = FALSE------------------------------------------------------------
#  # Or, you can save the entire data table to a tab-separated file that can be
#  # imported into Excel.
#  output_file_name <- "the/path/to/my/data/myfilename.txt"
#  write.table(gc_out, file = output_file_name,
#              quote = FALSE, sep = "\t", row.names = FALSE)

## ---- message = FALSE, fig.width = 7------------------------------------------
# As in the simple example, load the package and the data. 
library(growthcurver)
d <- growthdata

# Let's create an output data frame to store the results in. 
# We'll create it so that it is the right size (it's faster this way!), 
# but leave it empty.
num_analyses <- length(names(d)) - 1
d_gc <- data.frame(sample = character(num_analyses),
                   k = numeric(num_analyses),
                   n0  = numeric(num_analyses),
                   r = numeric(num_analyses),
                   t_mid = numeric(num_analyses),
                   t_gen = numeric(num_analyses),
                   auc_l = numeric(num_analyses),
                   auc_e = numeric(num_analyses),
                   sigma = numeric(num_analyses),
                   stringsAsFactors = FALSE)

# Truncate or trim the input data to observations occuring in the first 20 hours.
# Remember that the times in these sample data are reported in hours. To use  
# minutes (or to trim at a different time), change the next line of code. 
# For example, if you still would like to trim at 20 hours, but your time data 
# are reported in minutes use: trim_at_time <- 20 * 60
trim_at_time <- 20   

# Now, loop through all of the columns in the data frame. For each column,
# run Growthcurver, save the most useful metrics in the output data frame,
# and make a plot of all the growth curve data and their best fits.

# First, create a plot for each of the wells in the 96-well plate.
# Uncomment the next line to save the plots from your 96-well plate to a 
# pdf file in the working directory.
# pdf("growthcurver.pdf", height = 8.5, width = 11)
par(mfcol = c(8,12))
par(mar = c(0.25,0.25,0.25,0.25))
y_lim_max <- max(d[,setdiff(names(d), "time")]) - min(d[,setdiff(names(d), "time")])

n <- 1    # keeps track of the current row in the output data frame
for (col_name in names(d)) {
  
  # Don't process the column called "time". 
  # It contains time and not absorbance data.
  if (col_name != "time") {

    # Create a temporary data frame that contains just the time and current col
    d_loop <- d[, c("time", col_name)]
    
    # Do the background correction.
    # Background correction option 1: subtract the minimum value in a column
    #                                 from all measurements in that column
        min_value <- min(d_loop[, col_name])
    d_loop[, col_name] <- d_loop[, col_name] - min_value
    # Background correction option 2: subtract the mean value of blank wells
    #                                 over the course the experiment
    #                                 (Replace B2, D8, G11 with the column
    #                                  names of your media-only wells)
    #d$blank <- apply(d[, c("B2", "D8", "G11")], 1, mean)
    #d$A1 <- d$A1 - d$blank
    
    # Now, call Growthcurver to calculate the metrics using SummarizeGrowth
    gc_fit <- SummarizeGrowth(data_t = d_loop[, "time"], 
                              data_n = d_loop[, col_name],
                              t_trim = trim_at_time,
                              bg_correct = "none")
    
    # Now, add the metrics from this column to the next row (n) in the 
    # output data frame, and increment the row counter (n)
    d_gc$sample[n] <- col_name
    d_gc[n, 2:9] <- c(gc_fit$vals$k,
                      gc_fit$vals$n0,
                      gc_fit$vals$r,
                      gc_fit$vals$t_mid,
                      gc_fit$vals$t_gen,
                      gc_fit$vals$auc_l,
                      gc_fit$vals$auc_e,
                      gc_fit$vals$sigma)
    n <- n + 1
    
    # Finally, plot the raw data and the fitted curve
    # Here, I'll just print some of the data points to keep the file size smaller
    n_obs <- length(gc_fit$data$t)
    idx_to_plot <- 1:20 / 20 * n_obs
    plot(gc_fit$data$t[idx_to_plot], gc_fit$data$N[idx_to_plot], 
         pch = 20, 
         xlim = c(0, trim_at_time), 
         ylim = c(0, y_lim_max),
         cex = 0.6, xaxt = "n", yaxt = "n")
     text(x = trim_at_time / 4, y = y_lim_max, labels = col_name, pos = 1)
     lines(gc_fit$data$t, predict(gc_fit$model), col = "red")
  }
}
# Uncomment the next line to save the plots from your 96-well plate to a file
# dev.off()

## ---- eval = FALSE------------------------------------------------------------
#  # Look at the first few rows (samples) of data in the output data frame.
#  # (I'm only showing the first 4 rows of results, but you may want to see more.
#  #  You can either look at everything using the command "d_gc", or adjust the
#  #  number of rows displayed by changing the 4 to something else,
#  #  e.g., "d_gc[1:15,]").
#  d_gc[1:4, ]

## ---- eval = TRUE, message = FALSE, echo = FALSE------------------------------
library(dplyr)
d_gc[1:4, ] %>% 
    mutate(k = round(k, digits = 5),
         n0 = round(n0, digits = 5), 
         r = round(r, digits = 5),
         t_mid = round(t_mid, digits = 5),
         t_gen = round(t_gen, digits = 5),
         auc_l = round(auc_l, digits = 5),
         auc_e = round(auc_e, digits = 5), 
         sigma = round(sigma, digits = 5))
  

## ---- eval = FALSE, message = FALSE-------------------------------------------
#  # Check if Growthcurver provided any notes in a plate of growthcurves returned
#  # from SummarizeGrowthByPlate
#  gc_out %>% filter(note != "")
#  
#  # Check if Growthcurver provided any notes in a single growthcurve returned
#  # from SummarizeGrowth
#  gc_fit$vals$note

## ---- eval = TRUE, message = FALSE--------------------------------------------
# Load dplyr and the sample output data
library(dplyr)
gc_out <- as_data_frame(gc_out)

# Plot a histogram of the sigma values in order to check for outliers
hist(gc_out$sigma, main = "Histogram of sigma values", xlab = "sigma")


## ---- eval = FALSE, message = FALSE-------------------------------------------
#  # Show the top 5 samples with the largest sigma value
#  # (with the worst model fit to the growth curve data)
#  gc_out %>% top_n(5, sigma) %>% arrange(desc(sigma))

## ---- eval = TRUE, echo = FALSE, message = FALSE------------------------------
gc_out %>%  
  mutate(k = round(k, digits = 5),
         n0 = round(n0, digits = 5), 
         r = round(r, digits = 5),
         t_mid = round(t_mid, digits = 5),
         t_gen = round(t_gen, digits = 5),
         auc_l = round(auc_l, digits = 5),
         auc_e = round(auc_e, digits = 5), 
         sigma = round(sigma, digits = 5)) %>%
  top_n(5, sigma) %>% arrange(desc(sigma))

## ---- eval = TRUE, message = FALSE--------------------------------------------
# Load dplyr, ggplot2, and the sample data
library(dplyr)
library(ggplot2)
pca_gc_out <- as_data_frame(gc_out) 

# Prepare the gc_out data for the PCA
rownames(pca_gc_out) <- pca_gc_out$sample

# Do the PCA
pca.res <- prcomp(pca_gc_out %>% select(k:sigma), center=TRUE, scale=TRUE)

# Plot the results
as_data_frame(list(PC1=pca.res$x[,1],
                   PC2=pca.res$x[,2],
                   samples = pca_gc_out$sample)) %>% 
  ggplot(aes(x=PC1,y=PC2, label=samples)) + 
  geom_text(size = 3)