## ----------------------------------------------------------------------------- library(MASS) library(lime) data(biopsy) # First we'll clean up the data a bit biopsy$ID <- NULL biopsy <- na.omit(biopsy) names(biopsy) <- c('clump thickness', 'uniformity of cell size', 'uniformity of cell shape', 'marginal adhesion', 'single epithelial cell size', 'bare nuclei', 'bland chromatin', 'normal nucleoli', 'mitoses', 'class') # Now we'll fit a linear discriminant model on all but 4 cases set.seed(4) test_set <- sample(seq_len(nrow(biopsy)), 4) prediction <- biopsy$class biopsy$class <- NULL model <- lda(biopsy[-test_set, ], prediction[-test_set]) ## ----------------------------------------------------------------------------- predict(model, biopsy[test_set, ]) ## ----------------------------------------------------------------------------- explainer <- lime(biopsy[-test_set,], model, bin_continuous = TRUE, quantile_bins = FALSE) explanation <- explain(biopsy[test_set, ], explainer, n_labels = 1, n_features = 4) # Only showing part of output for better printing explanation[, 2:9] ## ---- fig.asp=1.25, out.width='70%', fig.width=6, fig.align='center'---------- plot_features(explanation, ncol = 1) ## ----------------------------------------------------------------------------- library(lime) library(xgboost) # the classifier library(text2vec) # used to build the BoW matrix # load data data(train_sentences) data(test_sentences) # Data are stored in a 2 columns data.frame, one for the sentences, one for the # labels. print(str(train_sentences)) # The list of possible classes for a sentence # We are only interested in the class "OWNX" print(unique(train_sentences$class.text)) # Tokenize data get_matrix <- function(text) { it <- itoken(text, progressbar = FALSE) create_dtm(it, vectorizer = hash_vectorizer()) } # BoW matrix generation dtm_train = get_matrix(train_sentences$text) dtm_test = get_matrix(test_sentences$text) # Create boosting model for binary classification (-> logistic loss) # Other parameters are quite standard param <- list(max_depth = 7, eta = 0.1, objective = "binary:logistic", eval_metric = "error", nthread = 1) xgb_model <- xgb.train( param, xgb.DMatrix(dtm_train, label = train_sentences$class.text == "OWNX"), nrounds = 50 ) ## ----------------------------------------------------------------------------- # We use a (standard) threshold of 0.5 predictions <- predict(xgb_model, dtm_test) > 0.5 test_labels <- test_sentences$class.text == "OWNX" # Accuracy print(mean(predictions == test_labels)) ## ---- fig.asp=1, out.width='70%', fig.width=6, fig.align='center'------------- # We select 10 sentences from the label OWNX sentence_to_explain <- head(test_sentences[test_labels,]$text, 5) explainer <- lime(sentence_to_explain, model = xgb_model, preprocess = get_matrix) explanation <- explain(sentence_to_explain, explainer, n_labels = 1, n_features = 2) # Most of the words choosen by Lime # are related to the team (we, our) # or part of the paper (Section, in) explanation[, 2:9] # Another more graphical view of the same information (2 first sentences only) plot_features(explanation) ## ---- eval=FALSE-------------------------------------------------------------- # plot_text_explanations(explanation) ## ----eval=FALSE--------------------------------------------------------------- # # Launching the application is done in one command # interactive_text_explanations(explainer) ## ---- echo=FALSE-------------------------------------------------------------- sessioninfo::session_info()