## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(cache = 2, cache.lazy = FALSE, tidy = FALSE, warning = FALSE)

## ---- eval=FALSE--------------------------------------------------------------
#  milr(y, x, bag, lambda, numLambda, lambdaCriterion, nfold, maxit)
#  softmax(y, x, bag, alpha, ...)

## ---- eval=FALSE--------------------------------------------------------------
#  fitted(object, type)
#  predict(object, newdata, bag_newdata, type)

## ----DGP1---------------------------------------------------------------------
library(milr)
library(pipeR)
set.seed(99)
# set the size of dataset
numOfBag <- 30
numOfInstsInBag <- 3
# set true coefficients: beta_0, beta_1, beta_2, beta_3
trueCoefs <- c(-2, -1, 2, 0.5)
trainData <- DGP(numOfBag, numOfInstsInBag, trueCoefs)
colnames(trainData$X) <- paste0("X", 1:ncol(trainData$X))
(instanceResponse <- as.numeric(with(trainData, tapply(Z, ID, any))))

## ----EST2---------------------------------------------------------------------
# fit milr model
milrFit_EST <- milr(trainData$Z, trainData$X, trainData$ID, lambda = 1e-7)
# call the Wald test result
summary(milrFit_EST)
# call the regression coefficients
coef(milrFit_EST)

## ----EST----------------------------------------------------------------------
fitted(milrFit_EST, type = "bag") 
# fitted(milrFit_EST, type = "instance") # instance-level fitted labels
table(DATA = instanceResponse, FITTED = fitted(milrFit_EST, type = "bag")) 
# predict for testing data
testData <- DGP(numOfBag, numOfInstsInBag, trueCoefs)
colnames(testData$X) <- paste0("X", 1:ncol(testData$X))
(instanceResponseTest <- as.numeric(with(trainData, tapply(Z, ID, any))))
pred_EST <- with(testData, predict(milrFit_EST, X, ID, type = "bag"))
# predict(milrFit_EST, testData$X, testData$ID, 
#         type = "instance") # instance-level prediction
table(DATA = instanceResponseTest, PRED = pred_EST) 

## ----VS, message=FALSE--------------------------------------------------------
set.seed(99)
# Set the new coefficienct vector (large p)
trueCoefs_Lp <- c(-2, -2, -1, 1, 2, 0.5, rep(0, 45))
# Generate the new training data with large p
trainData_Lp <- DGP(numOfBag, numOfInstsInBag, trueCoefs_Lp)
colnames(trainData_Lp$X) <- paste0("X", 1:ncol(trainData_Lp$X))
# variable selection by user-defined tuning set
lambdaSet <- exp(seq(log(0.01), log(20), length = 20))
milrFit_VS <- with(trainData_Lp, milr(Z, X, ID, lambda = lambdaSet))
# grep the active factors and their corresponding coefficients
coef(milrFit_VS) %>>% `[`(abs(.) > 0)

## ----AUTOVS,message=FALSE-----------------------------------------------------
# variable selection using auto-tuning
milrFit_auto_VS <- milr(trainData_Lp$Z, trainData_Lp$X, trainData_Lp$ID,
                        lambda = -1, numLambda = 5) 
# the auto-selected lambda values
milrFit_auto_VS$lambda 
# the values of BIC under each lambda value
milrFit_auto_VS$BIC
# grep the active factors and their corresponding coefficients
coef(milrFit_auto_VS) %>>% `[`(abs(.) > 0)

## ----CV,message=FALSE---------------------------------------------------------
# variable selection using auto-tuning with cross validation
milrFit_auto_CV <- milr(trainData_Lp$Z, trainData_Lp$X, trainData_Lp$ID,
                        lambda = -1, numLambda = 5, 
                        lambdaCriterion = "deviance", nfold = 3) 
# the values of predictive deviance under each lambda value
milrFit_auto_CV$cv 
# grep the active factors and their corresponding coefficients
coef(milrFit_auto_CV) %>>% `[`(abs(.) > 0)

## ----DLMUSK1------------------------------------------------------------------
dataName <- "MIL-Data-2002-Musk-Corel-Trec9.tgz"
dataUrl <- "http://www.cs.columbia.edu/~andrews/mil/data/"

## ----READMUSK1----------------------------------------------------------------
filePath <- file.path(getwd(), dataName)
# Download MIL data sets from the url (not run)
# if (!file.exists(filePath))
#  download.file(paste0(dataUrl, dataName), filePath)
# Extract MUSK1 data file (not run)
# if (!dir.exists("MilData"))
#   untar(filePath, files = "musk1norm.svm")
# Read and Preprocess MUSK1
library(data.table)
MUSK1 <- fread("musk1norm.svm", header = FALSE) %>>%
  `[`(j = lapply(.SD, function(x) gsub("\\d+:(.*)", "\\1", x))) %>>%
  `[`(j = c("bag", "label") := tstrsplit(V1, ":")) %>>%
  `[`(j = V1 := NULL) %>>% `[`(j = lapply(.SD, as.numeric)) %>>%
  `[`(j = `:=`(bag = bag + 1, label = (label + 1)/2)) %>>%
  setnames(paste0("V", 2:(ncol(.)-1)), paste0("V", 1:(ncol(.)-2))) %>>%
  `[`(j = paste0("V", 1:(ncol(.)-2)) := lapply(.SD, scale), 
       .SDcols = paste0("V", 1:(ncol(.)-2)))
X <- paste0("V", 1:(ncol(MUSK1) - 2), collapse = "+") %>>% 
  (paste("~", .)) %>>% as.formula %>>% model.matrix(MUSK1) %>>% `[`( , -1L)
Y <- as.numeric(with(MUSK1, tapply(label, bag, function(x) sum(x) > 0)))

## ----MIFIT,message=FALSE,results="hide"---------------------------------------
# softmax with alpha = 0
softmaxFit_0 <- softmax(MUSK1$label, X, MUSK1$bag, alpha = 0, 
                        control = list(maxit = 5000))
# softmax with alpha = 3
softmaxFit_3 <- softmax(MUSK1$label, X, MUSK1$bag, alpha = 3, 
                        control = list(maxit = 5000))
# use a very small lambda so that milr do the estimation 
# without evaluating the Hessian matrix
milrFit <- milr(MUSK1$label, X, MUSK1$bag, lambda = 1e-7, maxit = 5000) 

## ----MILRVS, cache=TRUE,cache.lazy=FALSE,message=FALSE,warning=FALSE,tidy=FALSE----
# MILR-LASSO
milrSV <- milr(MUSK1$label, X, MUSK1$bag, lambda = -1, numLambda = 20, 
               nfold = 3, lambdaCriterion = "deviance", maxit = 5000)
# show the detected active covariates
sv_ind <- names(which(coef(milrSV)[-1L] != 0)) %>>% 
  (~ print(.)) %>>% match(colnames(X))
# use a very small lambda so that milr do the estimation 
# without evaluating the Hessian matrix
milrREFit <- milr(MUSK1$label, X[ , sv_ind], MUSK1$bag, 
                  lambda = 1e-7, maxit = 5000)
# Confusion matrix of the fitted model
table(DATA = Y, FIT_MILR = fitted(milrREFit, type = "bag"))

## ----MUSK1PRED2,message=FALSE-------------------------------------------------
set.seed(99)
predY <- matrix(0, length(Y), 4L) %>>%
  `colnames<-`(c("s0","s3","milr","milr_sv"))
folds <- 3
foldBag <- rep(1:folds, floor(length(Y) / folds) + 1, 
               length = length(Y)) %>>% sample(length(.))
foldIns <- rep(foldBag, table(MUSK1$bag))
for (i in 1:folds) {
  # prepare training and testing sets
  ind <- which(foldIns == i)
  # train models
  fit_s0 <- softmax(MUSK1[-ind, ]$label, X[-ind, ], MUSK1[-ind, ]$bag,
                    alpha = 0, control = list(maxit = 5000))
  fit_s3 <- softmax(MUSK1[-ind, ]$label, X[-ind, ], MUSK1[-ind, ]$bag,
                    alpha = 3, control = list(maxit = 5000))
  # milr, use a very small lambda so that milr do the estimation
  #       without evaluating the Hessian matrix
  fit_milr <- milr(MUSK1[-ind, ]$label, X[-ind, ], MUSK1[-ind, ]$bag,
                   lambda = 1e-7, maxit = 5000)
  fit_milr_sv <- milr(MUSK1[-ind, ]$label, X[-ind, sv_ind], MUSK1[-ind, ]$bag,
                      lambda = 1e-7, maxit = 5000)
  # store the predicted labels
  ind2 <- which(foldBag == i)
  # predict function returns bag response in default
  predY[ind2, 1L] <- predict(fit_s0, X[ind, ], MUSK1[ind, ]$bag)
  predY[ind2, 2L] <- predict(fit_s3, X[ind, ], MUSK1[ind, ]$bag)
  predY[ind2, 3L] <- predict(fit_milr, X[ind, ], MUSK1[ind, ]$bag)
  predY[ind2, 4L] <- predict(fit_milr_sv, X[ind, sv_ind], MUSK1[ind, ]$bag)
}

table(DATA = Y, PRED_s0 = predY[ , 1L])
table(DATA = Y, PRED_s3 = predY[ , 2L])
table(DATA = Y, PRED_MILR = predY[ , 3L])
table(DATA = Y, PRED_MILR_SV = predY[ , 4L])