41497177de20a8c05c9e2bafbbe7bb51cd3dd9c6
[morpheus.git] / vignettes / report.Rmd
1 ---
2 title: morpheus...........
3
4 output:
5 pdf_document:
6 number_sections: true
7 toc_depth: 1
8 ---
9
10 ```{r setup, results="hide", include=FALSE}
11 knitr::opts_chunk$set(echo = TRUE, include = TRUE,
12 cache = TRUE, comment="", cache.lazy = FALSE,
13 out.width = "100%", fig.align = "center")
14 ```
15
16 0) Tell that we try to learn classification parameters in a non-EM way, using algebric manipulations.
17 1) Model.
18 2) Algorithm (as in article)
19 3) Experiments: show package usage
20
21 # Expériences
22
23 ```{r, results="show", include=TRUE, echo=TRUE}
24 library(Rmixmod) #to get clustering probability matrix
25 library(ClusVis)
26 library(FactoMineR) #for PCA
27
28 plotPCA <- function(prob)
29 {
30 par(mfrow=c(2,2), mar=c(4,4,2,2), mgp=c(2,1,0))
31 partition <- apply(prob, 1, which.max)
32 n <- nrow(prob)
33 K <- ncol(prob)
34 palette <- rainbow(K, s=.5)
35 cols <- palette[partition]
36 tmp <- PCA(rbind(prob, diag(K)), ind.sup=(n+1):(n+K), scale.unit=F, graph=F)
37 scores <- tmp$ind$coord[,1:3] #samples coords, by rows
38 ctrs <- tmp$ind.sup$coord #projections of indicator vectors (by cols)
39 for (i in 1:2)
40 {
41 for (j in (i+1):3)
42 {
43 absc <- scores[,i]
44 ords <- scores[,j]
45 xrange <- range(absc)
46 yrange <- range(ords)
47 plot(absc, ords, col=c(cols,rep(colors()[215],K),rep(1,K)),
48 pch=c(rep("o",n),rep(as.character(1:K),2)),
49 xlim=xrange, ylim=yrange,
50 xlab=paste0("Dim ", i, " (", round(tmp$eig[i,2],2), "%)"),
51 ylab=paste0("Dim ", j, " (", round(tmp$eig[j,2],2), "%)"))
52 ctrsavg <- t(apply(as.matrix(palette), 1,
53 function(cl) c(mean(absc[cols==cl]), mean(ords[cols==cl]))))
54 text(ctrsavg[,1], ctrsavg[,2], as.character(1:K), col=colors()[215])
55 text(ctrs[,i], ctrs[,j], as.character(1:K), col=1)
56 title(paste0("PCA ", i, "-", j, " / K=",K))
57 }
58 }
59 # TODO:
60 plot(0, xaxt="n", yaxt="n", xlab="", ylab="", col="white", bty="n")
61 }
62
63 plotClvz <- function(xem, alt=FALSE)
64 {
65 par(mfrow=c(2,2), mar=c(4,4,2,2), mgp=c(2,1,0))
66 if (alt) {
67 resvisu <- clusvis(log(xem@bestResult@proba), xem@bestResult@parameters@proportions)
68 } else {
69 resvisu <- clusvisMixmod(xem)
70 }
71 plotDensityClusVisu(resvisu, positionlegend=NULL)
72 plotDensityClusVisu(resvisu, add.obs=TRUE, positionlegend=NULL)
73 # TODO:
74 plot(0, xaxt="n", yaxt="n", xlab="", ylab="", col="white", bty="n")
75 plot(0, xaxt="n", yaxt="n", xlab="", ylab="", col="white", bty="n")
76 }
77
78 grlplot <- function(x, K, alt=FALSE) #x: data, K: nb classes
79 {
80 xem <- mixmodCluster(x, K, strategy=mixmodStrategy(nbTryInInit=500,nbTry=25))
81 plotPCA(xem@results[[1]]@proba)
82 plotClvz(xem, alt)
83 }
84 ```
85
86 ## Iris data
87
88 ```{r, results="show", include=TRUE, echo=TRUE}
89 data(iris)
90 x <- iris[,-5] #remove class info
91 for (i in 3:5)
92 {
93 print(paste("Resultats en", i, "classes"))
94 grlplot(x, i)
95 }
96 ```
97
98 ### finance dataset (from Rmixmod package)
99 #
100 #This dataset has two categorical attributes (the year and financial status), and four continuous ones.
101 #
102 #Warnings, some probabilities of classification are exactly equal to zero then we cannot use ClusVis
103 #
104 #```{r, results="show", include=TRUE, echo=TRUE}
105 #data(finance)
106 #x <- finance[,-2]
107 #for (i in 3:5)
108 #{
109 # print(paste("Resultats en", i, "classes"))
110 # grlplot(x, i, TRUE)
111 #}
112 #```
113 #
114 ### "Cathy dataset" (12 clusters)
115 #
116 #Warnings, some probabilities of classification are exactly equal to zero then we cannot use ClusVis
117 #
118 #```{r, results="hide", include=TRUE, echo=TRUE}
119 #cathy12 <- as.matrix(read.table("data/probapostCatdbBlocAtrazine-K12.txt"))
120 #resvisu <- clusvis(log(cathy12), prop = colMeans(cathy12))
121 #par(mfrow=c(2,2), mar=c(4,4,2,2), mgp=c(2,1,0))
122 #plotDensityClusVisu(resvisu, positionlegend = NULL)
123 #plotDensityClusVisu(resvisu, add.obs = TRUE, positionlegend = NULL)
124 #plotPCA(cathy12)
125 #```
126 #
127 ### Pima indian diabete
128 #
129 #[Source.](https://gist.github.com/ktisha/c21e73a1bd1700294ef790c56c8aec1f)
130 #
131 #```{r, results="show", include=TRUE, echo=TRUE}
132 #load("data/pimaData.rda")
133 #for (i in 3:5)
134 #{
135 # print(paste("Resultats en", i, "classes"))
136 # grlplot(x, i)
137 #}
138 #```
139 #
140 ### Breast cancer
141 #
142 #[Source.](http://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+\%28diagnostic\%29)
143 #
144 #```{r, results="show", include=TRUE, echo=TRUE}
145 #load("data/wdbc.rda")
146 #for (i in 3:5)
147 #{
148 # print(paste("Resultats en", i, "classes"))
149 # grlplot(x, i)
150 #}
151 #```
152 #
153 ### House-votes
154 #
155 #```{r, results="show", include=TRUE, echo=TRUE}
156 #load("data/house-votes.rda")
157 #for (i in 3:5)
158 #{
159 # print(paste("Resultats en", i, "classes"))
160 # grlplot(x, i)
161 #}
162 #```