## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## -----------------------------------------------------------------------------
library(asmbPLS) ## load the R package
set.seed(123) ## set seed to generate the same results

## ---- cache=TRUE--------------------------------------------------------------
data(asmbPLS.example) ## load the example data set for asmbPLS

## ---- cache=TRUE--------------------------------------------------------------
## show the first 5 microbial taxa and the first 5 metabolites for the first 5 samples. 
asmbPLS.example$X.matrix[1:5, c(1:5, 201:205)] 
## show the outcome for the first 5 samples.
asmbPLS.example$Y.matrix[1:5,] 

## ---- cache=TRUE--------------------------------------------------------------
X.matrix = asmbPLS.example$X.matrix
X.matrix.new = asmbPLS.example$X.matrix.new
Y.matrix = asmbPLS.example$Y.matrix
PLS.comp = asmbPLS.example$PLS.comp
X.dim = asmbPLS.example$X.dim
quantile.comb.table.cv = asmbPLS.example$quantile.comb.table.cv
Y.indicator = asmbPLS.example$Y.indicator

## cv to find the best quantile combinations for model fitting
cv.results <- asmbPLS.cv(X.matrix = X.matrix, 
                         Y.matrix = Y.matrix, 
                         PLS.comp = PLS.comp, 
                         X.dim = X.dim, 
                         quantile.comb.table = quantile.comb.table.cv, 
                         Y.indicator = Y.indicator,
                         k = 5,
                         ncv = 5)
## obtain the best quantile combination for each PLS component
quantile.comb <- cv.results$quantile_table_CV[,1:length(X.dim)] 
## obtain the optimal number of PLS components
n.PLS <- cv.results$optimal_nPLS 

## ---- cache=TRUE--------------------------------------------------------------
asmbPLS.results <- asmbPLS.fit(X.matrix = X.matrix, 
                               Y.matrix = Y.matrix, 
                               PLS.comp = n.PLS, 
                               X.dim = X.dim, 
                               quantile.comb = quantile.comb)

## ---- cache=TRUE--------------------------------------------------------------
Y.pred <- asmbPLS.predict(asmbPLS.results, X.matrix.new, n.PLS)
head(Y.pred$Y_pred)

## ---- cache=TRUE--------------------------------------------------------------
## prediction for original data to check the data fit
Y.fit <- asmbPLS.predict(asmbPLS.results, X.matrix, n.PLS)
check.fit <- cbind(Y.matrix, Y.fit$Y_pred) 
head(check.fit)

## ---- cache=TRUE--------------------------------------------------------------
data(asmbPLSDA.example) ## load the example data set for asmbPLS-DA

## ---- cache=TRUE--------------------------------------------------------------
## show the first 5 features from block 1 and the first 5 features from block 2 for the first 5 samples.
asmbPLSDA.example$X.matrix[1:5, c(1:5, 201:205)]  
## show the binary outcome for the first 5 samples.
asmbPLSDA.example$Y.matrix.binary[1:5,] 
## show the multiclass outcome for the first 5 samples.
asmbPLSDA.example$Y.matrix.morethan2levels[1:5,] 

## ---- cache=TRUE--------------------------------------------------------------
X.matrix = asmbPLSDA.example$X.matrix
X.matrix.new = asmbPLSDA.example$X.matrix.new
Y.matrix.binary = asmbPLSDA.example$Y.matrix.binary
Y.matrix.multiclass = asmbPLSDA.example$Y.matrix.morethan2levels
X.dim = asmbPLSDA.example$X.dim
PLS.comp = asmbPLSDA.example$PLS.comp
quantile.comb.table.cv = asmbPLSDA.example$quantile.comb.table.cv

## ---- cache=TRUE--------------------------------------------------------------
## cv to find the best quantile combinations for model fitting (binary outcome)
cv.results.binary <- asmbPLSDA.cv(X.matrix = X.matrix, 
                                  Y.matrix = Y.matrix.binary, 
                                  PLS.comp = PLS.comp, 
                                  X.dim = X.dim, 
                                  quantile.comb.table = quantile.comb.table.cv, 
                                  outcome.type = "binary",
                                  k = 5,
                                  ncv = 5)
quantile.comb.binary <- cv.results.binary$quantile_table_CV[,1:length(X.dim)]
n.PLS.binary <- cv.results.binary$optimal_nPLS

## ---- cache=TRUE--------------------------------------------------------------
## cv to find the best quantile combinations for model fitting 
## (categorical outcome with more than 2 levels)
cv.results.multiclass <- asmbPLSDA.cv(X.matrix = X.matrix, 
                                      Y.matrix = Y.matrix.multiclass, 
                                      PLS.comp = PLS.comp, 
                                      X.dim = X.dim, 
                                      quantile.comb.table = quantile.comb.table.cv, 
                                      outcome.type = "multiclass",
                                      k = 5,
                                      ncv = 5)
quantile.comb.multiclass <- cv.results.multiclass$quantile_table_CV[,1:length(X.dim)]
n.PLS.multiclass <- cv.results.multiclass$optimal_nPLS

## ---- cache=TRUE--------------------------------------------------------------
## asmbPLSDA fit using the selected quantile combination (binary outcome)
asmbPLSDA.fit.binary <- asmbPLSDA.fit(X.matrix = X.matrix, 
                                      Y.matrix = Y.matrix.binary, 
                                      PLS.comp = n.PLS.binary, 
                                      X.dim = X.dim, 
                                      quantile.comb = quantile.comb.binary,
                                      outcome.type = "binary")

## ---- cache=TRUE--------------------------------------------------------------
## asmbPLSDA fit (categorical outcome with more than 2 levels)
asmbPLSDA.fit.multiclass <- asmbPLSDA.fit(X.matrix = X.matrix, 
                                          Y.matrix = Y.matrix.multiclass, 
                                          PLS.comp = n.PLS.multiclass, 
                                          X.dim = X.dim, 
                                          quantile.comb = quantile.comb.multiclass,
                                          outcome.type = "multiclass")

## ---- cache=TRUE--------------------------------------------------------------
## classification for the new data based on the asmbPLS-DA model with the binary outcome.
Y.pred.binary <- asmbPLSDA.predict(asmbPLSDA.fit.binary, 
                                   X.matrix.new, 
                                   PLS.comp = n.PLS.binary)
## classification for the new data based on the asmbPLS-DA model with the multiclass outcome.
Y.pred.multiclass <- asmbPLSDA.predict(asmbPLSDA.fit.multiclass,
                                       X.matrix.new, 
                                       PLS.comp = n.PLS.multiclass)

## ---- cache=TRUE--------------------------------------------------------------
cv.results.cutoff <- cv.results.binary
quantile.comb.cutoff <- cv.results.cutoff$quantile_table_CV
## Cross validation using Euclidean distance of X super score
cv.results.EDX <- asmbPLSDA.cv(X.matrix = X.matrix, 
                               Y.matrix = Y.matrix.binary,
                               PLS.comp = PLS.comp, 
                               X.dim = X.dim, 
                               quantile.comb.table = quantile.comb.table.cv, 
                               outcome.type = "binary", 
                               method = "Euclidean_distance_X",
                               k = 5,
                               ncv = 5)
quantile.comb.EDX <- cv.results.EDX$quantile_table_CV

## Cross validation using Mahalanobis distance of X super score
cv.results.MDX <- asmbPLSDA.cv(X.matrix = X.matrix, 
                                  Y.matrix = Y.matrix.binary,
                                  PLS.comp = PLS.comp, 
                                  X.dim = X.dim, 
                                  quantile.comb.table = quantile.comb.table.cv, 
                                  outcome.type = "binary", 
                                  method = "Mahalanobis_distance_X",
                                  k = 5,
                                  ncv = 5)
quantile.comb.MDX <- cv.results.MDX$quantile_table_CV

## ---- cache=TRUE--------------------------------------------------------------
#### vote list ####
cv.results.list = list(fixed_cutoff = quantile.comb.cutoff,
                       Euclidean_distance_X = quantile.comb.EDX,
                       Mahalanobis_distance_X = quantile.comb.MDX)

## ---- cache=TRUE--------------------------------------------------------------
vote.fit <- asmbPLSDA.vote.fit(X.matrix = X.matrix, 
                               Y.matrix = Y.matrix.binary, 
                               X.dim = X.dim, 
                               nPLS = c(cv.results.cutoff$optimal_nPLS, 
                               cv.results.EDX$optimal_nPLS, 
                               cv.results.MDX$optimal_nPLS),
                               cv.results.list = cv.results.list, 
                               outcome.type = "binary",
                               method = "weighted")

## ---- cache=TRUE--------------------------------------------------------------
## classification
vote.predict <- asmbPLSDA.vote.predict(vote.fit, X.matrix.new)
head(vote.predict)

## ---- cache=TRUE--------------------------------------------------------------
## custom block.name and group.name
plotCor(asmbPLSDA.fit.binary, 
        ncomp = 1, 
        block.name = c("mRNA", "protein"), 
        group.name = c("control", "case"))

## ---- cache=TRUE--------------------------------------------------------------
## custom block.name and group.name
plotPLS(asmbPLSDA.fit.binary, 
        comp.X = 1, 
        comp.Y = 2, 
        group.name = c("control", "case"))

## -----------------------------------------------------------------------------
plotRelevance(asmbPLSDA.fit.binary, 
              n.top = 5,
              block.name = c("mRNA", "protein"))