'update'
[morpheus.git] / vignettes / report.Rmd
CommitLineData
3d5b5060
BA
1---
2title: morpheus...........
3
4output:
5 pdf_document:
6 number_sections: true
7 toc_depth: 1
8---
9
10```{r setup, results="hide", include=FALSE}
11knitr::opts_chunk$set(echo = TRUE, include = TRUE,
12 cache = TRUE, comment="", cache.lazy = FALSE,
13 out.width = "100%", fig.align = "center")
14```
15
160) Tell that we try to learn classification parameters in a non-EM way, using algebric manipulations.
171) Model.
182) Algorithm (as in article)
193) Experiments: show package usage
20
21# Expériences
22
23```{r, results="show", include=TRUE, echo=TRUE}
24library(Rmixmod) #to get clustering probability matrix
25library(ClusVis)
26library(FactoMineR) #for PCA
27
28plotPCA <- function(prob)
29{
30 par(mfrow=c(2,2), mar=c(4,4,2,2), mgp=c(2,1,0))
31 partition <- apply(prob, 1, which.max)
32 n <- nrow(prob)
33 K <- ncol(prob)
34 palette <- rainbow(K, s=.5)
35 cols <- palette[partition]
36 tmp <- PCA(rbind(prob, diag(K)), ind.sup=(n+1):(n+K), scale.unit=F, graph=F)
37 scores <- tmp$ind$coord[,1:3] #samples coords, by rows
38 ctrs <- tmp$ind.sup$coord #projections of indicator vectors (by cols)
39 for (i in 1:2)
40 {
41 for (j in (i+1):3)
42 {
43 absc <- scores[,i]
44 ords <- scores[,j]
45 xrange <- range(absc)
46 yrange <- range(ords)
47 plot(absc, ords, col=c(cols,rep(colors()[215],K),rep(1,K)),
48 pch=c(rep("o",n),rep(as.character(1:K),2)),
49 xlim=xrange, ylim=yrange,
50 xlab=paste0("Dim ", i, " (", round(tmp$eig[i,2],2), "%)"),
51 ylab=paste0("Dim ", j, " (", round(tmp$eig[j,2],2), "%)"))
52 ctrsavg <- t(apply(as.matrix(palette), 1,
53 function(cl) c(mean(absc[cols==cl]), mean(ords[cols==cl]))))
54 text(ctrsavg[,1], ctrsavg[,2], as.character(1:K), col=colors()[215])
55 text(ctrs[,i], ctrs[,j], as.character(1:K), col=1)
56 title(paste0("PCA ", i, "-", j, " / K=",K))
57 }
58 }
59 # TODO:
60 plot(0, xaxt="n", yaxt="n", xlab="", ylab="", col="white", bty="n")
61}
62
63plotClvz <- function(xem, alt=FALSE)
64{
65 par(mfrow=c(2,2), mar=c(4,4,2,2), mgp=c(2,1,0))
66 if (alt) {
67 resvisu <- clusvis(log(xem@bestResult@proba), xem@bestResult@parameters@proportions)
68 } else {
69 resvisu <- clusvisMixmod(xem)
70 }
71 plotDensityClusVisu(resvisu, positionlegend=NULL)
72 plotDensityClusVisu(resvisu, add.obs=TRUE, positionlegend=NULL)
73 # TODO:
74 plot(0, xaxt="n", yaxt="n", xlab="", ylab="", col="white", bty="n")
75 plot(0, xaxt="n", yaxt="n", xlab="", ylab="", col="white", bty="n")
76}
77
78grlplot <- function(x, K, alt=FALSE) #x: data, K: nb classes
79{
80 xem <- mixmodCluster(x, K, strategy=mixmodStrategy(nbTryInInit=500,nbTry=25))
81 plotPCA(xem@results[[1]]@proba)
82 plotClvz(xem, alt)
83}
84```
85
86## Iris data
87
88```{r, results="show", include=TRUE, echo=TRUE}
89data(iris)
90x <- iris[,-5] #remove class info
91for (i in 3:5)
92{
93 print(paste("Resultats en", i, "classes"))
94 grlplot(x, i)
95}
96```
97
98### finance dataset (from Rmixmod package)
99#
100#This dataset has two categorical attributes (the year and financial status), and four continuous ones.
101#
102#Warnings, some probabilities of classification are exactly equal to zero then we cannot use ClusVis
103#
104#```{r, results="show", include=TRUE, echo=TRUE}
105#data(finance)
106#x <- finance[,-2]
107#for (i in 3:5)
108#{
109# print(paste("Resultats en", i, "classes"))
110# grlplot(x, i, TRUE)
111#}
112#```
113#
114### "Cathy dataset" (12 clusters)
115#
116#Warnings, some probabilities of classification are exactly equal to zero then we cannot use ClusVis
117#
118#```{r, results="hide", include=TRUE, echo=TRUE}
119#cathy12 <- as.matrix(read.table("data/probapostCatdbBlocAtrazine-K12.txt"))
120#resvisu <- clusvis(log(cathy12), prop = colMeans(cathy12))
121#par(mfrow=c(2,2), mar=c(4,4,2,2), mgp=c(2,1,0))
122#plotDensityClusVisu(resvisu, positionlegend = NULL)
123#plotDensityClusVisu(resvisu, add.obs = TRUE, positionlegend = NULL)
124#plotPCA(cathy12)
125#```
126#
127### Pima indian diabete
128#
129#[Source.](https://gist.github.com/ktisha/c21e73a1bd1700294ef790c56c8aec1f)
130#
131#```{r, results="show", include=TRUE, echo=TRUE}
132#load("data/pimaData.rda")
133#for (i in 3:5)
134#{
135# print(paste("Resultats en", i, "classes"))
136# grlplot(x, i)
137#}
138#```
139#
140### Breast cancer
141#
142#[Source.](http://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+\%28diagnostic\%29)
143#
144#```{r, results="show", include=TRUE, echo=TRUE}
145#load("data/wdbc.rda")
146#for (i in 3:5)
147#{
148# print(paste("Resultats en", i, "classes"))
149# grlplot(x, i)
150#}
151#```
152#
153### House-votes
154#
155#```{r, results="show", include=TRUE, echo=TRUE}
156#load("data/house-votes.rda")
157#for (i in 3:5)
158#{
159# print(paste("Resultats en", i, "classes"))
160# grlplot(x, i)
161#}
162#```