vignettes/report.Rmd

   1 ---
   2 title: morpheus...........
   3
   4 output:
   5   pdf_document:
   6     number_sections: true
   7     toc_depth: 1
   8 ---
   9
  10 ```{r setup,  results="hide", include=FALSE}
  11 knitr::opts_chunk$set(echo = TRUE, include = TRUE,
  12   cache = TRUE, comment="", cache.lazy = FALSE,
  13   out.width = "100%", fig.align = "center")
  14 ```
  15
  16 0) Tell that we try to learn classification parameters in a non-EM way, using algebric manipulations.
  17 1) Model.
  18 2) Algorithm (as in article)
  19 3) Experiments: show package usage
  20
  21 # Expériences
  22
  23 ```{r, results="show", include=TRUE, echo=TRUE}
  24 library(Rmixmod) #to get clustering probability matrix
  25 library(ClusVis)
  26 library(FactoMineR) #for PCA
  27
  28 plotPCA <- function(prob)
  29 {
  30   par(mfrow=c(2,2), mar=c(4,4,2,2), mgp=c(2,1,0))
  31   partition <- apply(prob, 1, which.max)
  32   n <- nrow(prob)
  33   K <- ncol(prob)
  34   palette <- rainbow(K, s=.5)
  35   cols <- palette[partition]
  36   tmp <- PCA(rbind(prob, diag(K)), ind.sup=(n+1):(n+K), scale.unit=F, graph=F)
  37   scores <- tmp$ind$coord[,1:3] #samples coords, by rows
  38   ctrs <- tmp$ind.sup$coord #projections of indicator vectors (by cols)
  39   for (i in 1:2)
  40   {
  41     for (j in (i+1):3)
  42     {
  43       absc <- scores[,i]
  44       ords <- scores[,j]
  45       xrange <- range(absc)
  46       yrange <- range(ords)
  47       plot(absc, ords, col=c(cols,rep(colors()[215],K),rep(1,K)),
  48         pch=c(rep("o",n),rep(as.character(1:K),2)),
  49         xlim=xrange, ylim=yrange,
  50         xlab=paste0("Dim ", i, " (", round(tmp$eig[i,2],2), "%)"),
  51         ylab=paste0("Dim ", j, " (", round(tmp$eig[j,2],2), "%)"))
  52       ctrsavg <- t(apply(as.matrix(palette), 1,
  53         function(cl) c(mean(absc[cols==cl]), mean(ords[cols==cl]))))
  54             text(ctrsavg[,1], ctrsavg[,2], as.character(1:K), col=colors()[215])
  55             text(ctrs[,i], ctrs[,j], as.character(1:K), col=1)
  56             title(paste0("PCA ", i, "-", j, " / K=",K))
  57     }
  58   }
  59   # TODO:
  60   plot(0, xaxt="n", yaxt="n", xlab="", ylab="", col="white", bty="n")
  61 }
  62
  63 plotClvz <- function(xem, alt=FALSE)
  64 {
  65   par(mfrow=c(2,2), mar=c(4,4,2,2), mgp=c(2,1,0))
  66   if (alt) {
  67     resvisu <- clusvis(log(xem@bestResult@proba), xem@bestResult@parameters@proportions)
  68   } else {
  69     resvisu <- clusvisMixmod(xem)
  70   }
  71   plotDensityClusVisu(resvisu, positionlegend=NULL)
  72   plotDensityClusVisu(resvisu, add.obs=TRUE, positionlegend=NULL)
  73   # TODO:
  74   plot(0, xaxt="n", yaxt="n", xlab="", ylab="", col="white", bty="n")
  75   plot(0, xaxt="n", yaxt="n", xlab="", ylab="", col="white", bty="n")
  76 }
  77
  78 grlplot <- function(x, K, alt=FALSE) #x: data, K: nb classes
  79 {
  80   xem <- mixmodCluster(x, K, strategy=mixmodStrategy(nbTryInInit=500,nbTry=25))
  81   plotPCA(xem@results[[1]]@proba)
  82   plotClvz(xem, alt)
  83 }
  84 ```
  85
  86 ## Iris data
  87
  88 ```{r, results="show", include=TRUE, echo=TRUE}
  89 data(iris)
  90 x <- iris[,-5] #remove class info
  91 for (i in 3:5)
  92 {
  93   print(paste("Resultats en", i, "classes"))
  94   grlplot(x, i)
  95 }
  96 ```
  97
  98 ### finance dataset (from Rmixmod package)
  99 #
 100 #This dataset has two categorical attributes (the year and financial status), and four continuous ones.
 101 #
 102 #Warnings, some probabilities of classification are exactly equal to zero then we cannot use ClusVis
 103 #
 104 #```{r, results="show", include=TRUE, echo=TRUE}
 105 #data(finance)
 106 #x <- finance[,-2]
 107 #for (i in 3:5)
 108 #{
 109 #  print(paste("Resultats en", i, "classes"))
 110 #  grlplot(x, i, TRUE)
 111 #}
 112 #```
 113 #
 114 ### "Cathy dataset" (12 clusters)
 115 #
 116 #Warnings, some probabilities of classification are exactly equal to zero then we cannot use ClusVis
 117 #
 118 #```{r, results="hide", include=TRUE, echo=TRUE}
 119 #cathy12 <- as.matrix(read.table("data/probapostCatdbBlocAtrazine-K12.txt"))
 120 #resvisu <- clusvis(log(cathy12), prop = colMeans(cathy12))
 121 #par(mfrow=c(2,2), mar=c(4,4,2,2), mgp=c(2,1,0))
 122 #plotDensityClusVisu(resvisu, positionlegend = NULL)
 123 #plotDensityClusVisu(resvisu,  add.obs = TRUE, positionlegend = NULL)
 124 #plotPCA(cathy12)
 125 #```
 126 #
 127 ### Pima indian diabete
 128 #
 129 #[Source.](https://gist.github.com/ktisha/c21e73a1bd1700294ef790c56c8aec1f)
 130 #
 131 #```{r, results="show", include=TRUE, echo=TRUE}
 132 #load("data/pimaData.rda")
 133 #for (i in 3:5)
 134 #{
 135 #  print(paste("Resultats en", i, "classes"))
 136 #  grlplot(x, i)
 137 #}
 138 #```
 139 #
 140 ### Breast cancer
 141 #
 142 #[Source.](http://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+\%28diagnostic\%29)
 143 #
 144 #```{r, results="show", include=TRUE, echo=TRUE}
 145 #load("data/wdbc.rda")
 146 #for (i in 3:5)
 147 #{
 148 #  print(paste("Resultats en", i, "classes"))
 149 #  grlplot(x, i)
 150 #}
 151 #```
 152 #
 153 ### House-votes
 154 #
 155 #```{r, results="show", include=TRUE, echo=TRUE}
 156 #load("data/house-votes.rda")
 157 #for (i in 3:5)
 158 #{
 159 #  print(paste("Resultats en", i, "classes"))
 160 #  grlplot(x, i)
 161 #}
 162 #```