Introduction

This file contains some examples of the functions related to the Wrap routine. More specifically the functions `wrap` and `estLocScale`.

``````library("cellWise")
``````

Simple example: Stars data

``````# load the data

X <- robustbase::starsCYG; plot(X)
``````

``````cutoff <- sqrt(qchisq(0.975, ncol(X)))
X.mean <- colMeans(X)
X.cov  <- cov(X)
cor(X)
``````
``````##               log.Te  log.light
## log.Te     1.0000000 -0.2104133
## log.light -0.2104133  1.0000000
``````
``````locScaleX <- estLocScale(X)
Xw <- wrap(X, locScaleX\$loc, locScaleX\$scale)\$Xw
Xw.mean <- colMeans(Xw)
Xw.cov <- cov(Xw)
cor(Xw)
``````
``````##           [,1]      [,2]
## [1,] 1.0000000 0.5732006
## [2,] 0.5732006 1.0000000
``````
``````# classical Mahalanobis distances
#

md <- sqrt(mahalanobis(X, center = X.mean, cov = X.cov))
plot(1:nrow(X), md, pch = 16, cex = 0.9,
ylab = "Mahalanobis distance",
xlab = "Index",
main = "Mahalanobis distances",
xlim = c(0, nrow(X)), ylim = c(0, max(md, cutoff) + 0.2), col = "black")
abline(h = cutoff, lwd = 1.5, col = "red")
``````

``````# wrapping-based robust distances
#

rd <- sqrt(mahalanobis(X, center = Xw.mean, cov = Xw.cov))
plot(1:nrow(X), rd, pch = 16, cex = 0.9,
ylab = "robust distance",
xlab = "Index",
main = "Wrapping-based robust distances",
xlim = c(0, nrow(X)), ylim = c(0, max(rd, cutoff) + 0.2), col = "black")
abline(h = cutoff, lwd = 1.5, col = "red")
``````

``````# classical and wrapped tolerance ellipse
#

ell1 <- ellipse::ellipse(X.cov, centre = X.mean, t = cutoff, npoints = 120)
ell2 <- ellipse::ellipse(Xw.cov, centre = Xw.mean, t = cutoff, npoints = 120)
xmin <- min(min(X[, 1]), min(ell1[, 1]), min(ell2[, 1]))
xmax <- max(max(X[, 1]), max(ell1[, 1]), max(ell2[, 1]))
ymin <- min(min(X[, 2]), min(ell1[, 2]), min(ell2[, 2]))
ymax <- max(max(X[, 2]), max(ell1[, 2]), max(ell2[, 2]))
myxlim <- c(xmin, xmax); myylim <- c(ymin, ymax)
plot(X, xlab = "X1", ylab = "X2", main = "Stars: classical and wrapping ellipse",
pch = 16, col = "black", xlim = myxlim, ylim = myylim)
points(ell1, type = "l", lwd = 1.5, col = "red")
points(ell2, type = "l", lwd = 1.5, col = "blue")
``````

Bushfire dataset: a 5-dimensional example

``````?robustbase::bushfire
X <- as.matrix(robustbase::bushfire); pairs(X)
``````

``````cutoff <- sqrt(qchisq(0.975, ncol(X))); cutoff
``````
``````## [1] 3.582248
``````
``````X.mean <- colMeans(X); X.cov <- cov(X)
round(cor(X), 2)
``````
``````##       V1    V2    V3    V4    V5
## V1  1.00  0.80 -0.59 -0.49 -0.49
## V2  0.80  1.00 -0.53 -0.53 -0.52
## V3 -0.59 -0.53  1.00  0.97  0.98
## V4 -0.49 -0.53  0.97  1.00  1.00
## V5 -0.49 -0.52  0.98  1.00  1.00
``````
``````estX <- cellWise::estLocScale(X)
Xw <- cellWise::wrap(X, estX\$loc, estX\$scale)\$Xw
Xw.mean <- colMeans(Xw); Xw.cov <- cov(Xw)
round(cor(Xw), 2)
``````
``````##       [,1]  [,2]  [,3]  [,4]  [,5]
## [1,]  1.00  0.79 -0.62 -0.28 -0.45
## [2,]  0.79  1.00 -0.35  0.14 -0.06
## [3,] -0.62 -0.35  1.00  0.67  0.87
## [4,] -0.28  0.14  0.67  1.00  0.94
## [5,] -0.45 -0.06  0.87  0.94  1.00
``````
``````wd <- sqrt(mahalanobis(X, center = Xw.mean, cov = Xw.cov))
weights <- (wd < cutoff) + 0
# Reweighting functions:
wmean <- function(X, w) { # weighted mean
wmean1 <- function(y, w) { sum(y * w) / sum(w) }
apply(X, 2, FUN = wmean1, w = w)
}
wcov <- function(X, w) { # weighted cov
Xc <- sweep(X, 2, wmean(X, w), "-")
Xc <- sweep(Xc, 1, sqrt(w), "*")
t(Xc) %*% Xc / sum(w)
}
X.wmean <- wmean(X, weights)
X.wcov <- wcov(X, weights)
round(cov2cor(X.wcov), 2)
``````
``````##       V1    V2    V3    V4    V5
## V1  1.00  0.89 -0.71 -0.64 -0.65
## V2  0.89  1.00 -0.33 -0.25 -0.27
## V3 -0.71 -0.33  1.00  0.97  0.98
## V4 -0.64 -0.25  0.97  1.00  1.00
## V5 -0.65 -0.27  0.98  1.00  1.00
``````
``````# Classical Mahalanobis distances
#
md <- sqrt(mahalanobis(X, center = X.mean, cov = X.cov))
plot(1:nrow(X), md, pch = 16, cex = 0.9,
ylab = "Mahalanobis distance", xlab = "Index",
main = "Mahalanobis distances", xlim = c(0, nrow(X)),
ylim = c(0, max(md, cutoff) + 0.2), col = "black")
abline(h = cutoff, lwd = 1.5, col = "red")
``````

``````# No outliers stand out here.

# Wrapping-based robust distances
#
rwd <- sqrt(mahalanobis(X, center = X.wmean, cov = X.wcov))
plot(1:nrow(X),rwd,pch = 16, cex = 0.9,
ylab = "robust distance", xlab = "Index",
main = "Wrapping-based robust distances", xlim = c(0, nrow(X)),
ylim = c(0, max(rwd, cutoff) + 0.2), col = "black")
abline(h = cutoff, lwd = 1.5, col = "red")
``````

``````# Many outliers are visible, the same as in Maronna-Yohai 1995.

# Distance-distance plot:
#
plot(md, rwd, pch = 16, cex = 0.9, ylab = "robust distance",
xlab = "classical distance", main = "distance-distance plot",
xlim = c(0, max(md, cutoff) + 0.2),
ylim = c(0, max(rwd, cutoff) + 0.2), col = "black")
abline(v = cutoff, lwd = 1.5, col = "red")
abline(h = cutoff, lwd = 1.5, col = "red")
abline(0, 1, lwd = 1.5, lty = 2)
``````

``````# only the robust distances detect the outliers

# Classical and wrapped tolerance ellipses:
#
coordgrid <- expand.grid(1:dim(X)[2], 1:dim(X)[2])
coordgrid <- coordgrid[-(1 + (1:dim(X)[2] - 1) * (dim(X)[2] + 1)), ]
i = 1
pairs(X, panel = function(x, y) {
points(x, y)
inds <- as.numeric(coordgrid[i, ])
ell1 <- ellipse::ellipse(X.cov[inds, inds], centre = X.mean[inds],
t = cutoff, npoints = 120)
ell2 <- ellipse::ellipse(X.wcov[inds, inds], centre = X.wmean[inds],
t = cutoff, npoints = 120)
points(ell1, type = "l", lwd = 1.5, col = "red")
points(ell2, type = "l", lwd = 1.5, col = "blue")
i <<- i + 1
})
``````

Dog walker video example

``````data(dog_walker)
# keep a copy of the data for later:
X <- dog_walker

# Combine all pixels of an image in a row:
dims <- dim(X); dims # 54 144 180   3
``````
``````## [1]  54 144 180   3
``````
``````dim(X) <- c(dims[1], prod(dims[2:4]))
dim(X) #  54 77760
``````
``````## [1]    54 77760
``````
``````# Estimate location and scale using estLocScale:
locScaleX <- estLocScale(X, precScale = 1e-12)
``````
``````## Warning in estLocScale(X, precScale = 1e-12): 81  out of  77760  variables have an estimated scale <=
## "precScale" =  1e-12 .
``````
``````# Preprocess the data to deal with zeroscales:
zeropix <- which(locScaleX\$scale < 1e-12)
nzero <- length(zeropix)
filler <- 2 * ((1:dims[1]) - (dims[1] + 1) / 2) / (dims[1] - 1)
filler <- 0.9 / 255 * matrix(rep(filler, nzero), ncol = nzero, byrow = F)
centers <- apply(X[, zeropix], 2, FUN = median)
centers <- pmax(0, pmin(1, centers));
filler <- sweep(filler, 2, centers, "+")
X[, zeropix] <- filler
locScaleX\$scale[zeropix] <- estLocScale(X[, zeropix])\$scale

# Wrap the data
Xw <- wrap(X, locScaleX\$loc, locScaleX\$scale)\$Xw

# center the wrapped data
aveXw <- colMeans(Xw)
XwC <- sweep(Xw, 2, aveXw)

# Compute singular value decomposition of centered wrapped data
prcXwz <- svd::propack.svd(XwC, neig = 3)
``````
``````## [1] 77760     3
``````
``````Xwscores <- XwC %*% rloadings; dim(Xwscores)
``````
``````## [1] 54  3
``````
``````## Project original data on robust loadings:
XC <- sweep(X, 2, aveXw)

# Compute the fitted frames and residuals:
X.rfitted <- sweep(Xznew.rfitted, 2, aveXw, "+")
X.rresidual <- X - X.rfitted

dim(rim1) <- dims[2:4]; grid::grid.newpage()
grid::grid.raster(rim1, interpolate = FALSE)
``````

``````# Compute location and scale of the residuals
res.med <- median(X.rresidual)
res <- (X.rresidual - res.med) / res.mad

# Make plots
frameinds <- c(10, 20, 30, 40)
cutoff <- 200
for (i in 1:4) {
index <- frameinds[i]
restemp <- res[index,];
dim(restemp) <- c(prod(dims[2:3]), dims[4]);
res2 <- restemp[, 1]^2 + restemp[, 2]^2 + restemp[, 3]^2