| 1 | --- |
| 2 | title: morpheus........... |
| 3 | |
| 4 | output: |
| 5 | pdf_document: |
| 6 | number_sections: true |
| 7 | toc_depth: 1 |
| 8 | --- |
| 9 | |
| 10 | ```{r setup, results="hide", include=FALSE} |
| 11 | knitr::opts_chunk$set(echo = TRUE, include = TRUE, |
| 12 | cache = TRUE, comment="", cache.lazy = FALSE, |
| 13 | out.width = "100%", fig.align = "center") |
| 14 | ``` |
| 15 | |
| 16 | 0) Tell that we try to learn classification parameters in a non-EM way, using algebric manipulations. |
| 17 | 1) Model. |
| 18 | 2) Algorithm (as in article) |
| 19 | 3) Experiments: show package usage |
| 20 | |
| 21 | # Expériences |
| 22 | |
| 23 | ```{r, results="show", include=TRUE, echo=TRUE} |
| 24 | library(Rmixmod) #to get clustering probability matrix |
| 25 | library(ClusVis) |
| 26 | library(FactoMineR) #for PCA |
| 27 | |
| 28 | plotPCA <- function(prob) |
| 29 | { |
| 30 | par(mfrow=c(2,2), mar=c(4,4,2,2), mgp=c(2,1,0)) |
| 31 | partition <- apply(prob, 1, which.max) |
| 32 | n <- nrow(prob) |
| 33 | K <- ncol(prob) |
| 34 | palette <- rainbow(K, s=.5) |
| 35 | cols <- palette[partition] |
| 36 | tmp <- PCA(rbind(prob, diag(K)), ind.sup=(n+1):(n+K), scale.unit=F, graph=F) |
| 37 | scores <- tmp$ind$coord[,1:3] #samples coords, by rows |
| 38 | ctrs <- tmp$ind.sup$coord #projections of indicator vectors (by cols) |
| 39 | for (i in 1:2) |
| 40 | { |
| 41 | for (j in (i+1):3) |
| 42 | { |
| 43 | absc <- scores[,i] |
| 44 | ords <- scores[,j] |
| 45 | xrange <- range(absc) |
| 46 | yrange <- range(ords) |
| 47 | plot(absc, ords, col=c(cols,rep(colors()[215],K),rep(1,K)), |
| 48 | pch=c(rep("o",n),rep(as.character(1:K),2)), |
| 49 | xlim=xrange, ylim=yrange, |
| 50 | xlab=paste0("Dim ", i, " (", round(tmp$eig[i,2],2), "%)"), |
| 51 | ylab=paste0("Dim ", j, " (", round(tmp$eig[j,2],2), "%)")) |
| 52 | ctrsavg <- t(apply(as.matrix(palette), 1, |
| 53 | function(cl) c(mean(absc[cols==cl]), mean(ords[cols==cl])))) |
| 54 | text(ctrsavg[,1], ctrsavg[,2], as.character(1:K), col=colors()[215]) |
| 55 | text(ctrs[,i], ctrs[,j], as.character(1:K), col=1) |
| 56 | title(paste0("PCA ", i, "-", j, " / K=",K)) |
| 57 | } |
| 58 | } |
| 59 | # TODO: |
| 60 | plot(0, xaxt="n", yaxt="n", xlab="", ylab="", col="white", bty="n") |
| 61 | } |
| 62 | |
| 63 | plotClvz <- function(xem, alt=FALSE) |
| 64 | { |
| 65 | par(mfrow=c(2,2), mar=c(4,4,2,2), mgp=c(2,1,0)) |
| 66 | if (alt) { |
| 67 | resvisu <- clusvis(log(xem@bestResult@proba), xem@bestResult@parameters@proportions) |
| 68 | } else { |
| 69 | resvisu <- clusvisMixmod(xem) |
| 70 | } |
| 71 | plotDensityClusVisu(resvisu, positionlegend=NULL) |
| 72 | plotDensityClusVisu(resvisu, add.obs=TRUE, positionlegend=NULL) |
| 73 | # TODO: |
| 74 | plot(0, xaxt="n", yaxt="n", xlab="", ylab="", col="white", bty="n") |
| 75 | plot(0, xaxt="n", yaxt="n", xlab="", ylab="", col="white", bty="n") |
| 76 | } |
| 77 | |
| 78 | grlplot <- function(x, K, alt=FALSE) #x: data, K: nb classes |
| 79 | { |
| 80 | xem <- mixmodCluster(x, K, strategy=mixmodStrategy(nbTryInInit=500,nbTry=25)) |
| 81 | plotPCA(xem@results[[1]]@proba) |
| 82 | plotClvz(xem, alt) |
| 83 | } |
| 84 | ``` |
| 85 | |
| 86 | ## Iris data |
| 87 | |
| 88 | ```{r, results="show", include=TRUE, echo=TRUE} |
| 89 | data(iris) |
| 90 | x <- iris[,-5] #remove class info |
| 91 | for (i in 3:5) |
| 92 | { |
| 93 | print(paste("Resultats en", i, "classes")) |
| 94 | grlplot(x, i) |
| 95 | } |
| 96 | ``` |
| 97 | |
| 98 | ### finance dataset (from Rmixmod package) |
| 99 | # |
| 100 | #This dataset has two categorical attributes (the year and financial status), and four continuous ones. |
| 101 | # |
| 102 | #Warnings, some probabilities of classification are exactly equal to zero then we cannot use ClusVis |
| 103 | # |
| 104 | #```{r, results="show", include=TRUE, echo=TRUE} |
| 105 | #data(finance) |
| 106 | #x <- finance[,-2] |
| 107 | #for (i in 3:5) |
| 108 | #{ |
| 109 | # print(paste("Resultats en", i, "classes")) |
| 110 | # grlplot(x, i, TRUE) |
| 111 | #} |
| 112 | #``` |
| 113 | # |
| 114 | ### "Cathy dataset" (12 clusters) |
| 115 | # |
| 116 | #Warnings, some probabilities of classification are exactly equal to zero then we cannot use ClusVis |
| 117 | # |
| 118 | #```{r, results="hide", include=TRUE, echo=TRUE} |
| 119 | #cathy12 <- as.matrix(read.table("data/probapostCatdbBlocAtrazine-K12.txt")) |
| 120 | #resvisu <- clusvis(log(cathy12), prop = colMeans(cathy12)) |
| 121 | #par(mfrow=c(2,2), mar=c(4,4,2,2), mgp=c(2,1,0)) |
| 122 | #plotDensityClusVisu(resvisu, positionlegend = NULL) |
| 123 | #plotDensityClusVisu(resvisu, add.obs = TRUE, positionlegend = NULL) |
| 124 | #plotPCA(cathy12) |
| 125 | #``` |
| 126 | # |
| 127 | ### Pima indian diabete |
| 128 | # |
| 129 | #[Source.](https://gist.github.com/ktisha/c21e73a1bd1700294ef790c56c8aec1f) |
| 130 | # |
| 131 | #```{r, results="show", include=TRUE, echo=TRUE} |
| 132 | #load("data/pimaData.rda") |
| 133 | #for (i in 3:5) |
| 134 | #{ |
| 135 | # print(paste("Resultats en", i, "classes")) |
| 136 | # grlplot(x, i) |
| 137 | #} |
| 138 | #``` |
| 139 | # |
| 140 | ### Breast cancer |
| 141 | # |
| 142 | #[Source.](http://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+\%28diagnostic\%29) |
| 143 | # |
| 144 | #```{r, results="show", include=TRUE, echo=TRUE} |
| 145 | #load("data/wdbc.rda") |
| 146 | #for (i in 3:5) |
| 147 | #{ |
| 148 | # print(paste("Resultats en", i, "classes")) |
| 149 | # grlplot(x, i) |
| 150 | #} |
| 151 | #``` |
| 152 | # |
| 153 | ### House-votes |
| 154 | # |
| 155 | #```{r, results="show", include=TRUE, echo=TRUE} |
| 156 | #load("data/house-votes.rda") |
| 157 | #for (i in 3:5) |
| 158 | #{ |
| 159 | # print(paste("Resultats en", i, "classes")) |
| 160 | # grlplot(x, i) |
| 161 | #} |
| 162 | #``` |