## ----echo = FALSE------------------------------------------------------------- knitr::opts_chunk$set( fig.width = 6, fig.height = 6, fig.align ='center' ) ## ----------------------------------------------------------------------------- library(cellWise) ## ----------------------------------------------------------------------------- data("data_clothes") X <- data_clothes dim(X) # create matrix S as in paper: sum(X) # total count is 4373 P <- X / sum(X) # relative frequencies that add up to 1 rvec <- rowSums(P) # vector of row totals cvec <- colSums(P) # vector of column totals R <- X / rowSums(X) # row profiles rowSums(R) # all 1, OK S <- diag(sqrt(rvec)) %*% (R - rep(1, nrow(X)) %*% t(cvec)) %*% diag(1/sqrt(cvec)) dimnames(S) <- dimnames(X) round(S, 3) d <- ncol(S) # We verify that the points of S are on a hyperplane by construction: (eigvals <- eigen(t(S) %*% S)$values) ## ----fig.height=8,fig.width=3------------------------------------------------- DDC.out <- DDC(S) ggp <- cellMap(R = DDC.out$stdResid, mTitle = "clothes data", rowtitle = "countries", columntitle = "brackets") # pdf("clothes_cellmap.pdf", width = 3, height = 6) plot(ggp) # dev.off() ## ----------------------------------------------------------------------------- countryInds <- which(rownames(S) %in% c("GB", "SK", "MT", "GR", "BG", "LV", "RO")) matplot(t(S[countryInds, ]), pch = 16, col = 1:7) lines(apply(S, 2, median), lwd = 3) # median profile # The profile of these countries deviate in a similar way: # they trade a lot of cheap clothes, and fewer expensive ones. ## ----fig.height=6,fig.width=6------------------------------------------------- svd.out <- svd(S) svd.out$d# S is indeed singular: (svd.out$d)^2 - eigen(t(S) %*% S)$values # ~0 diff(c(0,cumsum(svd.out$d^2)/sum(svd.out$d^2))) # This can be shown in a scree plot. # For plotting the rows: rowproj <- diag(1/sqrt(rvec)) %*% svd.out$u %*% diag(svd.out$d) # For plotting the columns = variables: colproj <- diag(1/sqrt(cvec)) %*% svd.out$v %*% diag(svd.out$d) # pdf(file="clothes_ClassCorrespA.pdf", width=7, height=6) plot(rowproj[, 1:2], col = "white", xlab="", ylab="", xlim = c(-1, 0.6), ylim = c(-0.6, 0.6)) title(main="Classical correspondence analysis of clothes data", line=1) text(rowproj[, 1:2], labels = rownames(S)) abline(v=0); abline(h=0) text(colproj[, 1:2], labels = colnames(S), col="red") facs = c(0.93,0.85,0.78,0.84,0.87) shape::Arrows(0, 0, facs*colproj[, 1], facs*colproj[, 2], col = "red", arr.type="simple", arr.width=0.1, arr.length = 0.1) # , arr.adj = 0) title(xlab="Dimension 1", line=2.3) title(ylab="Dimension 2", line=2.3) # dev.off() ## ----fig.height=6,fig.width=6------------------------------------------------- # We apply MacroPCA with center fixed at zero. # As in classical CA, we do not prescale S. MacroPCApar0 <- list(scale = FALSE, center = rep(0,d)) MacroPCA.out <- MacroPCA(S, k=0, MacroPCApars = MacroPCApar0) MacroPCA.out <- MacroPCA(S, k=2, MacroPCApars = MacroPCApar0) (eigvals <- MacroPCA.out$eigenvalues) diff(c(0,cumsum(eigvals)/sum(eigvals))) # Compute the principal coordinates for the biplot. # To make the plot match the orientation in Riani et al: V <- -MacroPCA.out$loadings Tscores <- -MacroPCA.out$scores sVals <- sqrt(nrow(S)*MacroPCA.out$eigenvalues) U <- Tscores %*% diag(1/sVals) rowproj <- diag(1/sqrt(rvec)) %*% U %*% diag(sVals) colproj <- diag(1/sqrt(cvec)) %*% V %*% diag(sVals) # pdf(file="clothes_MacroCA_biplot.pdf", width=7, height=6) plot(rowproj[, 1:2], col = "white", xlim=c(-0.95,0.65), ylim=c(-0.6,0.6), xlab="", ylab="") title(main="Cellwise robust correspondence analysis of clothes data", line=1) text(rowproj[,1:2], labels = rownames(S)) abline(h=0); abline(v=0) text(colproj[, 1:2], labels = colnames(S), col="red") facs = c(0.9,0.8,0.5,0.75,0.85) shape::Arrows(0, 0, facs*colproj[, 1], facs*colproj[, 2], col = "red", arr.type="simple", arr.width=0.1, arr.length = 0.1) title(xlab="Dimension 1", line=2.3) title(ylab="Dimension 2", line=2.3) # dev.off() # Matches Fig 4 of Riani quite well. ## ----------------------------------------------------------------------------- data("data_brands") X <- data_brands dim(X) sum(X) # total count is 11713 P <- X/sum(X) # relative frequencies that add up to 1 rvec <- rowSums(P) # vector of row totals hist(rvec) # Right tail: Chevrolet, Ford, Honda, Toyota # These brands are well known and sold a lot in the US. cvec <- colSums(P) # vector of column totals R <- X / rowSums(X) # row profiles S <- diag(sqrt(rvec)) %*% (R - rep(1, nrow(X)) %*% t(cvec)) %*% diag(1/sqrt(cvec)) dimnames(S) <- dimnames(X) d <- ncol(S) ## ----fig.height=8,fig.width=3------------------------------------------------- DDC.out <- DDC(S) ggp <- cellMap(R = DDC.out$stdResid, mTitle = "brands data", rowtitle = "brands", columntitle = "perceptions") # pdf("brands_cellmap.pdf", width = 3.5, height = 8) plot(ggp) # dev.off() # Volvo is most deviating (3 cells), followed by Hyundai # (2 cells) and Maserati (2 cells). ## ----fig.height=6,fig.width=6------------------------------------------------- svd.out <- svd(S) svd.out$d # S is singular: diff(c(0,cumsum(svd.out$d^2)/sum(svd.out$d^2))) # Can be plotted in a scree plot. # To match the plot in Riani at al: svd.out$v[, 2] = -svd.out$v[, 2] svd.out$u[, 2] = -svd.out$u[, 2] rowproj <- diag(1/sqrt(rvec)) %*% svd.out$u %*% diag(svd.out$d) colproj <- diag(1/sqrt(cvec)) %*% svd.out$v %*% diag(svd.out$d) # pdf(file="brands_ClassCorrespA.pdf", width=7, height=6) plot(rowproj[, 1:2], col = "white", xlim=c(-1.1,1.1), ylim=c(-0.8,1.5), xlab="", ylab="") title(main="Classical correspondence analysis of brands data", line=1) text(rowproj[,1:2], labels = rownames(S)) abline(v=0); abline(h=0) text(colproj[, 1:2], labels = colnames(S), col="red") facs = c(0.8,0.9,0.8,0.65,0.92,0.8,0.65) shape::Arrows(0, 0, facs*colproj[, 1], facs*colproj[, 2], col = "red", arr.type="simple", arr.width=0.1, arr.length = 0.1) title(xlab="Dimension 1", line=2.3) title(ylab="Dimension 2", line=2.3) # dev.off() ## ----fig.height=6,fig.width=6------------------------------------------------- MacroPCApar0 <- list(scale = FALSE, center = rep(0, d)) MacroPCA.out <- MacroPCA(S, k = 0, MacroPCApars = MacroPCApar0) MacroPCA.out <- MacroPCA(S, k = 3, MacroPCApars = MacroPCApar0) # Computations for the biplot. V <- MacroPCA.out$loadings scores <- MacroPCA.out$scores # To make the plot match the orientation in Riani et al: V[,2] <- -V[,2] scores[,2] <- -scores[,2] sVals <- sqrt(nrow(S)*MacroPCA.out$eigenvalues) U <- scores %*% diag(1/sVals) rowproj <- diag(1/sqrt(rvec)) %*% U %*% diag(sVals) colproj <- diag(1/sqrt(cvec)) %*% V %*% diag(sVals) # pdf(file="brands_MacroCA_biplot.pdf", width=7, height=6) plot(rowproj[, 1:2], col = "white", xlim=c(-1.1,1.1), ylim=c(-0.6,0.6), xlab="", ylab="") title(main="Cellwise robust correspondence analysis of brands data", line=1) text(rowproj[,1:2], labels = rownames(S)) abline(h=0); abline(v=0) text(colproj[, 1:2], labels = colnames(S), col="red") facs = c(0.75,0.76,0.9,0.88,0.65,0.74,0.52) shape::Arrows(0, 0, facs*colproj[, 1], facs*colproj[, 2], col = "red", arr.type="simple", arr.width=0.1, arr.length = 0.1) title(xlab="Dimension 1", line=2.3) title(ylab="Dimension 2", line=2.3) # dev.off() # Roughly matches Figure 7 of Riani et al (2022).