## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ---- echo = TRUE, eval = FALSE-----------------------------------------------
#  install.packages("missoNet")

## ---- echo = TRUE, eval = FALSE-----------------------------------------------
#  if(!require("devtools")) {
#    install.packages("devtools")
#  }
#  devtools::install_github("yixiao-zeng/missoNet")

## -----------------------------------------------------------------------------
library(missoNet)

## -----------------------------------------------------------------------------
## Specify a random seed for reproducibility.
## The overall missing rate in the response matrix is around 10%.
## The missing mechanism can also be "MCAR" or "MNAR".
sim.dat <- generateData(n = 150, p = 15, q = 12, rho = 0.1, missing.type = "MAR", with.seed = 1512)

tr <- 1:120  # training set indices
tst <- 121:150  # test set indices

## ---- eval = FALSE------------------------------------------------------------
#  Z <- sim.dat$Z  # corrupted response matrix
#  visdat::vis_miss(as.data.frame(Z))

## ---- include = FALSE, eval = FALSE-------------------------------------------
#  png(file = "vismis.png", width = 1900, height = 1300, res = 260)
#  suppressWarnings(visdat::vis_miss(as.data.frame(sim.dat$Z)))
#  dev.off()

## ---- echo = FALSE, fig.align = 'left', out.width = "80%"---------------------
knitr::include_graphics(system.file("extdata", "vismis.png", package = "missoNet"))

## -----------------------------------------------------------------------------
## Training set
X.tr <- sim.dat$X[tr, ]  # predictor matrix
Z.tr <- sim.dat$Z[tr, ]  # corrupted response matrix

## Using the training set to fit the model.
## 'lambda.Beta' and 'lambda.Theta' are arbitrarily set to 0.1.
## 'verbose' = 0 suppresses printing of messages.
fit <- missoNet(X = X.tr, Y = Z.tr, lambda.Beta = 0.1, lambda.Theta = 0.1, verbose = 0)

## -----------------------------------------------------------------------------
lambda.Beta.vec <- 10^(seq(from = 0, to = -1, length.out = 10))  # 10 values on the log scale, from 1 to 0.1
lambda.Theta.vec <- rep(0.1, 10)  # for each value of 'lambda.Beta', 'lambda.Theta' remains constant at 0.1
fit_list <- missoNet(X = X.tr, Y = Z.tr, lambda.Beta = lambda.Beta.vec, lambda.Theta = lambda.Theta.vec, verbose = 0)

## -----------------------------------------------------------------------------
## If 'permute' = FALSE, the samples will be split into k-folds in their original orders,
## i.e. the first (n/'kfold') samples will be assigned to the first fold an so on.

cvfit <- cv.missoNet(X = X.tr, Y = Z.tr, kfold = 5,
                     lambda.Beta = lambda.Beta.vec, lambda.Theta = lambda.Theta.vec,
                     permute = TRUE, with.seed = 433, verbose = 0)

## ---- eval = FALSE------------------------------------------------------------
#  ## 'fit.1se = TRUE' tells the program to make additional estimations of the
#  ## parameters at the largest value of 'lambda.Beta' / 'lambda.Theta' that gives
#  ## the most regularized model such that the cross-validated error is within one
#  ## standard error of the minimum.
#  
#  cl <- parallel::makeCluster(min(parallel::detectCores()-1, 2))
#  cvfit <- cv.missoNet(X = X.tr, Y = Z.tr, kfold = 5,
#                       fit.1se = TRUE, parallel = TRUE, cl = cl,
#                       permute = TRUE, with.seed = 433, verbose = 1)
#  parallel::stopCluster(cl)

## ---- eval = FALSE------------------------------------------------------------
#  ## The values of mean cross-validated errors along with upper and lower standard deviation bounds
#  ## can be accessed via 'cvfit$cvm', 'cvfit$cvup' and 'cvfit$cvlo', respectively.
#  
#  plot(cvfit)

## ---- echo = FALSE, fig.align = 'left', out.width = "80%"---------------------
knitr::include_graphics(system.file("extdata", "cvfitHeat.png", package = "missoNet"))

## ---- eval = FALSE------------------------------------------------------------
#  plot(cvfit, type = "cv.scatter", plt.surf = FALSE)

## ---- echo = FALSE, fig.align = 'left', out.width = "80%"---------------------
knitr::include_graphics(system.file("extdata", "cvfitScat.png", package = "missoNet"))

## ---- eval = FALSE------------------------------------------------------------
#  ## Define a plotting function
#  plot_heatmap <- function(est, col, legend = FALSE, lgd_name, title) {
#    return(ComplexHeatmap::Heatmap(est, cluster_rows = FALSE, cluster_columns = FALSE,
#                                   col = col, show_heatmap_legend = legend, name = lgd_name,
#                                   column_names_gp = grid::gpar(fontsize = 8),
#                                   row_names_gp = grid::gpar(fontsize = 8), row_names_side = "left",
#                                   border = TRUE, column_title = title))
#  }
#  
#  ## Color space
#  col <- circlize::colorRamp2(c(-2, 0, 2), c("blue", "white", "red"))
#  
#  ## Beta*
#  Beta_star <- sim.dat$Beta
#  Beta_star_ht <- plot_heatmap(Beta_star, col, title = expression(paste(bold(Beta), "*")))
#  
#  ## Beta_hat at "lambda.min"
#  Beta_hat_min <- cvfit$est.min$Beta
#  Beta_hat_min_ht <- plot_heatmap(Beta_hat_min, col, title = expression(paste(hat(bold(Beta)), " at `lambda.min`")))
#  
#  ## Beta_hat at "lambda.1se.Beta"
#  Beta_hat_1se <- cvfit$est.1se.B$Beta
#  Beta_hat_1se_ht <- plot_heatmap(Beta_hat_1se, col, legend = TRUE, lgd_name = "values",
#                                  title = expression(paste(hat(bold(Beta)), " at `lambda.Beta.1se`")))
#  
#  ## Theta*
#  Theta_star <- sim.dat$Theta
#  Theta_star_ht <- plot_heatmap(Theta_star, col, title = expression(paste(bold(Theta), "*")))
#  
#  ## Theta_hat at "lambda.min"
#  Theta_hat_min <- cvfit$est.min$Theta
#  Theta_hat_min_ht <- plot_heatmap(Theta_hat_min, col, title = expression(paste(hat(bold(Theta)), " at `lambda.min`")))
#  
#  ## Theta_hat at "lambda.1se.Beta"
#  Theta_hat_1se <- cvfit$est.1se.B$Theta
#  Theta_hat_1se_ht <- plot_heatmap(Theta_hat_1se, col, legend = TRUE, lgd_name = "values",
#                                   title = expression(paste(hat(bold(Theta)), " at `lambda.Beta.1se`")))
#  
#  ## Plot
#  Beta_star_ht + Beta_hat_min_ht + Beta_hat_1se_ht
#  Theta_star_ht + Theta_hat_min_ht + Theta_hat_1se_ht

## ---- echo = FALSE, fig.align = 'left', out.width = "100%"--------------------
knitr::include_graphics(system.file("extdata", "cvfitEst1.png", package = "missoNet"))

## ---- echo = FALSE, fig.align = 'left', out.width = "100%"--------------------
knitr::include_graphics(system.file("extdata", "cvfitEst2.png", package = "missoNet"))

## ---- eval = FALSE------------------------------------------------------------
#  ## Predictions on the test set: newy = mu + newx %*% Beta.
#  
#  ## 's' can also be "lambda.1se.Beta" or "lambda.1se.Theta"
#  ## (why 's' but not 'lambda'? Because we follow the naming rules of glmnet).
#  
#  newy <- predict(cvfit, newx = sim.dat$X[tst, ], s = "lambda.min")
#  
#  cat("dim(newy):", dim(newy))

## ---- echo = FALSE------------------------------------------------------------
cat("dim(newy): 30 12")

## ---- eval = FALSE------------------------------------------------------------
#  ## Load data
#  library(bgsmtr, quietly = TRUE)
#  data(bgsmtr_example_data)
#  
#  ## Transpose data matrix to make rows correspond to
#  ## samples and columns correspond to variables.
#  SNP <- t(bgsmtr_example_data$SNP_data)  # predictor matrix
#  BM <- t(bgsmtr_example_data$BrainMeasures)  # complete response matrix
#  
#  ## Unsupervised filtering of the top 50% variables.
#  SNP_var <- apply(SNP, 2, var)
#  SNP_subset <- SNP_var > quantile(SNP_var, 0.5)
#  SNP <- SNP[ ,SNP_subset]
#  
#  set.seed(123)  # a random seed for reproducibility
#  tr <- sample(1:632, 550, replace = FALSE)  # training set indices
#  tst <- c(1:632)[-tr]  # test set indices
#  
#  cat("dim(SNP):", dim(SNP[tr, ]), "
#  dim(BM):", dim(BM[tr, ]))

## ---- echo = FALSE------------------------------------------------------------
cat("dim(SNP): 550 243
dim(BM): 550 15")

## ---- eval = FALSE------------------------------------------------------------
#  ## Generate the indicator matrix of missingness.
#  M <- matrix(1, nrow(BM), ncol(BM))
#  NA.pos <- do.call("cbind", lapply(1:ncol(BM), function(x) {
#    rbinom(nrow(BM), size = 1, prob = 0.05) == 1
#  }))
#  ## All missing values should be coded as 'NA's or 'NaN's.
#  M[NA.pos] <- NA
#  BM.mis = BM * M
#  
#  cat(BM.mis[1:6, 5:8])

## ---- echo = FALSE------------------------------------------------------------
cat("    Left_InfLatVent.adj Left_LatVent.adj Left_EntCtx.adj Left_Fusiform.adj
V1           -473.42061       -4595.2672      0.60678817                NA
V2            100.12989       -9163.7594      0.29868885       0.196951758
V3            373.36131      -10659.8137      0.18835349       0.105863582
V4            876.03547               NA     -1.19832801      -0.233082858
V5           -887.40929       -6789.0606      0.09203347       0.098447465
V6            354.12157               NA     -0.32757347       0.310694508")

## ---- eval = FALSE------------------------------------------------------------
#  ## Model I (the coarse-grid fit)
#  
#  ## To be more in line with real-world usage, the model is trained without specified
#  ## missing probabilities for the response variables ('rho' = NULL) and lambda sequences
#  ## for the lasso penalties ('lambda.Beta' = NULL, 'lambda.Theta' = NULL), in which case
#  ## the program will automatically compute and use reasonable values.
#  
#  cl <- parallel::makeCluster(min(parallel::detectCores()-1, 2))
#  cvfit.BM <- cv.missoNet(X = SNP[tr, ], Y = BM.mis[tr, ], kfold = 5,
#                          rho = NULL, lambda.Beta = NULL, lambda.Theta = NULL,
#                          lamBeta.min.ratio = 1e-4, lamTheta.min.ratio = 1e-4,
#                          n.lamBeta = 20, n.lamTheta = 20,
#                          lamBeta.scale.factor = 3, lamTheta.scale.factor = 3,
#                          standardize = TRUE, standardize.response = TRUE,
#                          permute = TRUE, with.seed = 433,
#                          parallel = TRUE, cl = cl, verbose = 1)
#  parallel::stopCluster(cl)

## ---- eval = FALSE------------------------------------------------------------
#  plot(cvfit.BM)

## ---- echo = FALSE, fig.align = 'left', out.width = "80%"---------------------
knitr::include_graphics(system.file("extdata", "cvfitBM.png", package = "missoNet"))

## ---- eval = FALSE------------------------------------------------------------
#  ## Model II (the fine-grid fit)
#  
#  ## For brevity, arguments taking the defaults are not specified explicitly.
#  cl <- parallel::makeCluster(min(parallel::detectCores()-1, 2))
#  cvfit2.BM <- cv.missoNet(X = SNP[tr, ], Y = BM.mis[tr, ], kfold = 5,
#                           lamBeta.min.ratio = 0.01, lamTheta.min.ratio = 0.01,
#                           n.lamBeta = 40, n.lamTheta = 40,
#                           lamBeta.scale.factor = 3, lamTheta.scale.factor = 0.3,
#                           fit.relax = TRUE, permute = TRUE, with.seed = 433,
#                           parallel = TRUE, cl = cl, verbose = 1)
#  parallel::stopCluster(cl)
#  
#  ## Certainly, users can specify the lambda values (linear on the log scale) by themselves
#  ## for the cross-validation search rather than use the control arguments. The following
#  ## commands should return almost the same results as the above ones (subtle differences
#  ## come from rounding the float numbers).
#  
#  # cl <- parallel::makeCluster(min(parallel::detectCores()-1, 2))
#  # cvfit2.BM <- cv.missoNet(X = SNP[tr, ], Y = BM.mis[tr, ], kfold = 5,
#  #                          lambda.Beta = 10^(seq(from = log10(2.66), to = log10(2.66*0.01), length.out = 40)),
#  #                          lambda.Theta = 10^(seq(from = log10(2.99), to = log10(2.99*0.01), length.out = 40)),
#  #                          fit.relax = TRUE, permute = TRUE, with.seed = 433,
#  #                          parallel = TRUE, cl = cl, verbose = 0)
#  # parallel::stopCluster(cl)

## ---- eval = FALSE------------------------------------------------------------
#  cat("\"lambda.min\":
#    - lambda.Beta:", cvfit2.BM$est.min$lambda.Beta, "
#    - lambda.Theta:", cvfit2.BM$est.min$lambda.Theta)
#  
#  plot(cvfit2.BM, detailed.axes = FALSE)

## ---- echo = FALSE------------------------------------------------------------
cat("\"lambda.min\":
  - lambda.Beta: 0.198283
  - lambda.Theta: 0.573265")

## ---- echo = FALSE, fig.align = 'left', out.width = "80%"---------------------
knitr::include_graphics(system.file("extdata", "cvfit2BM.png", package = "missoNet"))

## ---- eval = FALSE------------------------------------------------------------
#  ## Theta_hat at "lambda.min"
#  Theta_hat <- cvfit2.BM$est.min$Theta
#  rownames(Theta_hat) <- colnames(Theta_hat) <- colnames(BM)
#  col <- circlize::colorRamp2(c(-2, 0, 2), c("blue", "white", "red"))
#  ht1 <- plot_heatmap(Theta_hat, col = col, legend = TRUE, lgd_name = "values",
#                      title = expression(paste(hat(bold(Theta)), " at `lambda.min` (unclustered)")))
#  plot(ht1)
#  
#  ## Beta_hat at "lambda.min"
#  Beta_hat <- cvfit2.BM$est.min$Beta
#  colnames(Beta_hat) <- colnames(BM)
#  col <- circlize::colorRamp2(c(-0.05, 0, 0.05), c("blue", "white", "red"))
#  ht2 <- plot_heatmap(Beta_hat, col = col, legend = TRUE, lgd_name = "values",
#                      title = expression(paste(hat(bold(Beta)), " at `lambda.min` (unclustered)")))
#  plot(ht2)
#  
#  ## Relaxed network
#  relax.net <- cvfit2.BM$est.min$relax.net
#  rownames(relax.net) <- colnames(relax.net) <- colnames(BM)
#  col <- circlize::colorRamp2(c(-4, 0, 4), c("blue", "white", "red"))
#  ht3 <- plot_heatmap(relax.net, col = col, legend = TRUE, lgd_name = "values",
#                      title = "Relaxed fit of network (unclustered)")
#  plot(ht3)
#  
#  ## Plot the partial correlation matrix of the 15 neuroimaging measures.
#  ## Must supply a dataset without missing values, so we use the original dataset here.
#  pcor.mat <- ppcor::pcor(BM)  # not BM.mis
#  corrplot::corrplot(pcor.mat$estimate, p.mat = pcor.mat$p.value,
#                     sig.level = 0.01, insig = "blank",
#                     tl.cex = 0.5, tl.col = "black", mar = c(.5, .1, 2, .1),
#                     title = "Partial correlation matrix (unclustered)")

## ---- echo = FALSE, fig.align = 'left', out.width = "100%"--------------------
# knitr::include_graphics(paste("vignetteFigs", "BMcomb.png", sep = .Platform$file.sep))
knitr::include_graphics(system.file("extdata", "BMcomb.png", package = "missoNet"))

## ---- eval = FALSE------------------------------------------------------------
#  colnames(Beta_hat) <- colnames(BM)  # brain measure names
#  rownames(Beta_hat) <- colnames(SNP)  # SNP names
#  Gene_groups <- bgsmtr_example_data$SNP_groups[SNP_subset]  # gene names
#  
#  ComplexHeatmap::pheatmap(Beta_hat,
#                           annotation_row = data.frame("Gene" = Gene_groups, row.names = rownames(Beta_hat)),
#                           color = circlize::colorRamp2(c(-0.05,0,0.05), c("blue","white","red")), show_rownames = FALSE,
#                           border_color = NA, border = TRUE, name = "values", cluster_rows = TRUE, cluster_cols = FALSE,
#                           column_title = expression(paste(hat(bold(Beta)), " at `lambda.min` (clustered)")))

## ---- eval = FALSE, include = FALSE-------------------------------------------
#  set.seed(34)  # seed for legend colors
#  ComplexHeatmap::pheatmap(cvfit2.BM$est.min$Beta,
#                           annotation_row = data.frame("Gene" = cvfit2.BM$Gene, row.names = rownames(cvfit2.BM$est.min$Beta)),
#                           color = circlize::colorRamp2(c(-0.05,0,0.05), c("blue","white","red")), show_rownames = FALSE,
#                           border_color = NA, border = TRUE, name = "values", cluster_rows = TRUE, cluster_cols = FALSE,
#                           column_title = expression(paste(hat(bold(Beta)), " at `lambda.min` (clustered)")))

## ---- echo = FALSE, fig.align = 'left', out.width = "80%"---------------------
# knitr::include_graphics(paste("vignetteFigs", "BMcomb.png", sep = .Platform$file.sep))
knitr::include_graphics(system.file("extdata", "cvfit2BMclustB.png", package = "missoNet"))

## ---- eval = FALSE------------------------------------------------------------
#  ## Model III (the complete-data fit)
#  
#  ## All arguments are kept unchanged compared to model II, except BM.mis -> BM.
#  cl <- parallel::makeCluster(min(parallel::detectCores()-1, 2))
#  cvfit3.BM <- cv.missoNet(X = SNP[tr, ], Y = BM[tr, ], kfold = 5,
#                           lamBeta.min.ratio = 0.01, lamTheta.min.ratio = 0.01,
#                           n.lamBeta = 40, n.lamTheta = 40,
#                           lamBeta.scale.factor = 3, lamTheta.scale.factor = 0.3,
#                           fit.relax = TRUE, permute = TRUE, with.seed = 433,
#                           parallel = TRUE, cl = cl, verbose = 1)
#  parallel::stopCluster(cl)

## ---- eval = FALSE------------------------------------------------------------
#  ## Predictions on the test set.
#  newy.model2 <- predict(cvfit2.BM, newx = SNP[tst, ], s = "lambda.min")
#  newy.model3 <- predict(cvfit3.BM, newx = SNP[tst, ], s = "lambda.min")
#  
#  cat("MAE on the test set:
#    - Model II (corrupted):", mean(abs(BM[tst, ] - newy.model2)), "
#    - Model III (complete):", mean(abs(BM[tst, ] - newy.model3)))

## ---- echo = FALSE------------------------------------------------------------
cat("MAE on the test set:
  - Model II (corrupted): 2543.588
  - Model III (complete): 2543.591")