| 1 | ## File: StBr.r |
| 2 | ## Description: screens meaningful variables and performns |
| 3 | ## data transformation on clustering |
| 4 | |
| 5 | # rm(list = ls()) |
| 6 | |
| 7 | |
| 8 | ## Description: Steinley & Brusco (2006) data transform to cluster |
| 9 | StBrtransform <- function(X){ |
| 10 | apply(X, 2, function(x) 12 * var(x) / (max(x) - min(x))^2 ) |
| 11 | } |
| 12 | |
| 13 | |
| 14 | ## Description: Clustering index (Steinley & Brusco (2006)) |
| 15 | CI <- function(X, B = 1000) { # B : number of boostrap replications |
| 16 | |
| 17 | n <- nrow(X) |
| 18 | p <- ncol(X) |
| 19 | |
| 20 | #ci <- apply(X, 2, function(x) 12 * var(x) / (max(x) - min(x))^2 ) |
| 21 | ci <- StBrtransform(X) |
| 22 | |
| 23 | rc <- ci / min(ci) |
| 24 | minV <- which.min(rc) |
| 25 | |
| 26 | Xstar <- scale(X) |
| 27 | newRange <- apply(Xstar, 2, function(x) max(x) - min(x)) |
| 28 | |
| 29 | rmin <- newRange[minV] |
| 30 | |
| 31 | datat <- array(0.0, dim = dim(X)) |
| 32 | |
| 33 | # Reweighting X into datat |
| 34 | for(i in 1:p){ |
| 35 | v <- Xstar[, i] |
| 36 | temp <- rc[i] * (rmin / newRange[i])^2 |
| 37 | datat[, i] <- sqrt(temp) * v |
| 38 | } |
| 39 | |
| 40 | xboot <- matrix(rnorm(n * B), nrow = n) |
| 41 | #cinorm <- apply(xboot, 2, function(x) 12 * var(x) / (max(x) - min(x))^2 ) |
| 42 | cinorm <- StBrtransform(xboot) |
| 43 | ci95 <- median(cinorm) |
| 44 | |
| 45 | #ciStar <- apply(datat, 2, function(x) 12 * var(x) / (max(x) - min(x))^2 ) |
| 46 | ciStar <- StBrtransform(datat) |
| 47 | selectv <- which(ciStar > ci95) |
| 48 | |
| 49 | return(list(selectv = selectv, |
| 50 | tdata = datat) ) |
| 51 | } |
| 52 | |
| 53 | |
| 54 | |
| 55 | |
| 56 | #test <- matrix(rnorm(200), 40, 5) |
| 57 | #CI(test)$selectv |