## ----setup, include=FALSE-----------------------------------------------------
library(canprot)
library(CHNOSZ)
oldopt <- options(width = 80)
## ----HTML, include=FALSE------------------------------------------------------
Zc <- "ZC"
## ----echo = FALSE-------------------------------------------------------------
knitr::read_chunk("../demo/thermophiles.R")
## ----thermophiles_demo_body, out.width = "100%", fig.align = "center", fig.width = 8, fig.height = 6, echo = FALSE, message = FALSE, dpi = 150----
# Change this to TRUE to make the image for the README
# (with only the Nitrososphaeria plot)
do.pdf <- FALSE
if(do.pdf) pdf("thermophiles.pdf", width = 4.2, height = 4.2)
if(!do.pdf) par(mfrow = c(1, 2))
par(mar = c(4, 4.5, 2, 1))
# Define metrics to calculate
metrics <- c("Zc", "S0g")
xlab <- cplab[[metrics[1]]]
ylab <- cplab[[metrics[2]]]
# Define common y-axis limit
ylim <- c(0.3000, 0.3045)
# Define colors
col2.8 <- adjustcolor(2, alpha.f = 0.8)
col4.8 <- adjustcolor(4, alpha.f = 0.8)
col2.5 <- adjustcolor(2, alpha.f = 0.5)
if(!do.pdf) {
## Plot 1: methanogen genomes
# Read amino acid composition
aafile <- system.file("extdata/aa/methanogen_aa.csv", package = "canprot")
aa <- read.csv(aafile)
# Set fill color for thermophiles
ithermo <- aa$ref > 50
bg <- ifelse(ithermo, col2.8, col4.8)
# Set point symbol for Class I and II
pch <- ifelse(aa$abbrv == "Class I", 21, 22)
# Calculate and plot metrics
metvals <- calc_metrics(aa, metrics)
plot(metvals, pch = pch, bg = bg, xlab = xlab, ylab = ylab, ylim = ylim)
# Add convex hull around thermophiles
add_hull(metvals[ithermo, ], col = col2.5, border = NA)
# Add legend and title
text(-0.225, 0.303, "Thermophiles", col = 2)
text(-0.18, 0.3003, "Mesophiles", col = 4)
legend("bottomleft", c("Class I", "Class II"), pch = c(21, 22), cex = 0.9)
title("Methanogen genomes", font.main = 1)
}
## Plot 2: Nitrososphaeria MAGs
# Read genome data
datfile <- system.file("extdata/aa/nitrososphaeria_MAGs.csv", package = "canprot")
dat <- read.csv(datfile)
# Read amino acid compositions
aafile <- system.file("extdata/aa/nitrososphaeria_aa.csv", package = "canprot")
aa <- read.csv(aafile)
# Match accessions
idat <- match(aa$organism, dat$Accession)
dat <- dat[idat, ]
# Filter by family
ifam <- !dat$Family %in% c("Nitrosopumilaceae", "Nitrososphaeraceae")
dat <- dat[ifam, ]
aa <- aa[ifam, ]
# Assign point symbol and color
pch <- sapply(dat$Respiration.type, switch, Anaerobic = 21, Aerobic = 22, Microaerobic = 23)
bg <- sapply(dat$Habitat.type, switch, Thermal = col2.8, Nonthermal = col4.8)
# Calculate and plot metrics
metvals <- calc_metrics(aa, metrics)
# Make plot
plot(metvals, pch = pch, bg = bg, xlab = xlab, ylab = ylab, ylim = ylim)
# Add convex hull around MAGs from thermal habitats
ithermal <- dat$Habitat.type == "Thermal"
add_hull(metvals[ithermal, ], col = col2.5, border = NA)
# Add legend and title
text(-0.215, 0.303, "Thermal habitats", col = 2)
text(-0.175, 0.3013, "Nonthermal habitats", col = 4)
legend("bottomright", c("Anaerobic", "Aerobic", "Microaerobic"), pch = c(21, 22, 23), cex = 0.9)
title("Nitrososphaeria MAGs", font.main = 1)
if(do.pdf) dev.off()
## ----echo = FALSE-------------------------------------------------------------
knitr::read_chunk("../demo/locations.R")
## ----locations_demo_setup, echo = FALSE---------------------------------------
# Change to TRUE to make a PDF image
do.pdf <- FALSE
if(do.pdf) pdf("locations.pdf", width = 8, height = 4.5)
## ----locations_demo_body, out.width = "100%", fig.align = "center", fig.width = 8, fig.height = 4.5, dpi = 150----
# Read SI table
file <- system.file("extdata/protein/TAW+17_Table_S6_Validated.csv", package = "canprot")
dat <- read.csv(file)
# Keep only proteins with validated location
dat <- dat[dat$Reliability == "Validated", ]
# Keep only proteins with one annotated location
dat <- dat[rowSums(dat[, 4:32]) == 1, ]
# Get the amino acid compositions
aa <- human_aa(dat$Uniprot)
# Put the location into the amino acid data frame
aa$location <- dat$IF.main.protein.location
# Use top locations (and their colors) from Fig. 2B of Thul et al., 2017
locations <- c("Cytosol","Mitochondria","Nucleoplasm","Nucleus","Vesicles","Plasma membrane")
col <- c("#194964", "#2e6786", "#8a2729", "#b2333d", "#e0ce1d", "#e4d71c")
# Keep the proteins in these locations
aa <- aa[aa$location %in% locations, ]
## Keep only proteins with length between 100 and 2000
#aa <- aa[plength(aa) >= 100 & plength(aa) <= 2000, ]
# Get amino acid composition for proteins in each location
# (Loop over groups by piping location names into lapply)
aalist <- lapply(locations, function(location) aa[aa$location == location, ] )
# Setup plot
par(mfrow = c(1, 2))
titles <- c(Zc = "Carbon oxidation state", pI = "Isoelectric point")
# Calculate Zc and pI
for(metric in c("Zc", "pI")) {
datlist <- lapply(aalist, metric)
bp <- boxplot(datlist, ylab = cplab[[metric]], col = col, show.names = FALSE)
add_cld(datlist, bp)
# Make rotated labels
x <- (1:6) + 0.1
y <- par()$usr[3] - 1.5 * strheight("A")
text(x, y, locations, srt = 25, adj = 1, xpd = NA)
axis(1, labels = FALSE)
title(titles[metric], font.main = 1)
}
## ----echo = FALSE-------------------------------------------------------------
knitr::read_chunk("../demo/redoxins.R")
## ----redoxins_demo_body, out.width = "75%", fig.align = "center", fig.width = 6, fig.height = 5, echo = FALSE, message = FALSE, dpi = 100----
# Data file with protein IDs, sequence start/stop positions, and midpoint potentials
data_file <- system.file("extdata/fasta/redoxin.csv", package = "canprot")
dat <- read.csv(data_file)
# Drop PDI (a human protein) 20240304
dat <- dat[dat$protein != "PDI", ]
# Read header lines
fasta_file <- system.file("extdata/fasta/redoxin.fasta", package = "canprot")
headers <- read_fasta(fasta_file, type = "header")
# Locate the sequences in the FASTA file
iseqs <- sapply(dat$ID, grep, x = headers)
# Loop over proteins
aalist <- lapply(1:nrow(dat), function(i) {
# Read the amino acid composition of this protein
read_fasta(fasta_file, iseq = iseqs[i], start = dat$start[i], stop = dat$stop[i])
})
aa <- do.call(rbind, aalist)
# Make ferredoxin-thioredoxin reductase dimer (variable chain/catalytic chain)
iFTR <- grep("FTR", dat$protein)
aa[iFTR[1], 6:24] <- colSums(aa[iFTR, 6:24])
aa <- aa[-iFTR[2], ]
dat$protein[iFTR[1]] <- paste(dat$protein[iFTR], collapse = ":")
dat <- dat[-iFTR[2], ]
# Calculate Zc
Zc_values <- Zc(aa)
# Point symbols for E. coli and spinach
pch <- rep(19, length(Zc_values))
pch[dat$organism=="spinach"] <- 0
# Start plot
par(las = 1)
plot(dat$E0, Zc_values, pch = pch, xlim = c(-450, -100), ylim = c(-0.28, -0.04),
xlab = expression(list(italic(E)*degree*"'", mV)),
ylab = expression(italic(Z)[C]))
# Add dashed lines
lines(dat$E0[dat$organism == "ecoli"], Zc_values[dat$organism == "ecoli"], lty = 2)
lines(dat$E0[dat$organism == "spinach"], Zc_values[dat$organism == "spinach"], lty = 2)
# Add labels
pos <- rep(1, length(Zc_values))
pos[dat$organism == "ecoli"] <- 4
pos[dat$organism == "spinach"] <- 2
dx <- dy <- numeric(length(Zc_values))
dx[dat$protein == "DsbA"] <- -10
dy[dat$protein == "DsbA"] <- -0.012
dx[dat$protein == "DsbC"] <- -20
dy[dat$protein == "DsbC"] <- -0.012
text(dat$E0 + dx, Zc_values + dy, dat$protein, pos = pos)
# Add legend
legend("bottomright", pch = c(19, 0), legend = c(expression(italic("E. coli")), "spinach"))
## ----reset, include=FALSE-----------------------------------------------------
options(oldopt)