Commit | Line | Data |
---|---|---|
3d5b5060 BA |
1 | --- |
2 | title: morpheus........... | |
3 | ||
4 | output: | |
5 | pdf_document: | |
6 | number_sections: true | |
7 | toc_depth: 1 | |
8 | --- | |
9 | ||
10 | ```{r setup, results="hide", include=FALSE} | |
11 | knitr::opts_chunk$set(echo = TRUE, include = TRUE, | |
12 | cache = TRUE, comment="", cache.lazy = FALSE, | |
13 | out.width = "100%", fig.align = "center") | |
14 | ``` | |
15 | ||
16 | 0) Tell that we try to learn classification parameters in a non-EM way, using algebric manipulations. | |
17 | 1) Model. | |
18 | 2) Algorithm (as in article) | |
19 | 3) Experiments: show package usage | |
20 | ||
21 | # Expériences | |
22 | ||
23 | ```{r, results="show", include=TRUE, echo=TRUE} | |
24 | library(Rmixmod) #to get clustering probability matrix | |
25 | library(ClusVis) | |
26 | library(FactoMineR) #for PCA | |
27 | ||
28 | plotPCA <- function(prob) | |
29 | { | |
30 | par(mfrow=c(2,2), mar=c(4,4,2,2), mgp=c(2,1,0)) | |
31 | partition <- apply(prob, 1, which.max) | |
32 | n <- nrow(prob) | |
33 | K <- ncol(prob) | |
34 | palette <- rainbow(K, s=.5) | |
35 | cols <- palette[partition] | |
36 | tmp <- PCA(rbind(prob, diag(K)), ind.sup=(n+1):(n+K), scale.unit=F, graph=F) | |
37 | scores <- tmp$ind$coord[,1:3] #samples coords, by rows | |
38 | ctrs <- tmp$ind.sup$coord #projections of indicator vectors (by cols) | |
39 | for (i in 1:2) | |
40 | { | |
41 | for (j in (i+1):3) | |
42 | { | |
43 | absc <- scores[,i] | |
44 | ords <- scores[,j] | |
45 | xrange <- range(absc) | |
46 | yrange <- range(ords) | |
47 | plot(absc, ords, col=c(cols,rep(colors()[215],K),rep(1,K)), | |
48 | pch=c(rep("o",n),rep(as.character(1:K),2)), | |
49 | xlim=xrange, ylim=yrange, | |
50 | xlab=paste0("Dim ", i, " (", round(tmp$eig[i,2],2), "%)"), | |
51 | ylab=paste0("Dim ", j, " (", round(tmp$eig[j,2],2), "%)")) | |
52 | ctrsavg <- t(apply(as.matrix(palette), 1, | |
53 | function(cl) c(mean(absc[cols==cl]), mean(ords[cols==cl])))) | |
54 | text(ctrsavg[,1], ctrsavg[,2], as.character(1:K), col=colors()[215]) | |
55 | text(ctrs[,i], ctrs[,j], as.character(1:K), col=1) | |
56 | title(paste0("PCA ", i, "-", j, " / K=",K)) | |
57 | } | |
58 | } | |
59 | # TODO: | |
60 | plot(0, xaxt="n", yaxt="n", xlab="", ylab="", col="white", bty="n") | |
61 | } | |
62 | ||
63 | plotClvz <- function(xem, alt=FALSE) | |
64 | { | |
65 | par(mfrow=c(2,2), mar=c(4,4,2,2), mgp=c(2,1,0)) | |
66 | if (alt) { | |
67 | resvisu <- clusvis(log(xem@bestResult@proba), xem@bestResult@parameters@proportions) | |
68 | } else { | |
69 | resvisu <- clusvisMixmod(xem) | |
70 | } | |
71 | plotDensityClusVisu(resvisu, positionlegend=NULL) | |
72 | plotDensityClusVisu(resvisu, add.obs=TRUE, positionlegend=NULL) | |
73 | # TODO: | |
74 | plot(0, xaxt="n", yaxt="n", xlab="", ylab="", col="white", bty="n") | |
75 | plot(0, xaxt="n", yaxt="n", xlab="", ylab="", col="white", bty="n") | |
76 | } | |
77 | ||
78 | grlplot <- function(x, K, alt=FALSE) #x: data, K: nb classes | |
79 | { | |
80 | xem <- mixmodCluster(x, K, strategy=mixmodStrategy(nbTryInInit=500,nbTry=25)) | |
81 | plotPCA(xem@results[[1]]@proba) | |
82 | plotClvz(xem, alt) | |
83 | } | |
84 | ``` | |
85 | ||
86 | ## Iris data | |
87 | ||
88 | ```{r, results="show", include=TRUE, echo=TRUE} | |
89 | data(iris) | |
90 | x <- iris[,-5] #remove class info | |
91 | for (i in 3:5) | |
92 | { | |
93 | print(paste("Resultats en", i, "classes")) | |
94 | grlplot(x, i) | |
95 | } | |
96 | ``` | |
97 | ||
98 | ### finance dataset (from Rmixmod package) | |
99 | # | |
100 | #This dataset has two categorical attributes (the year and financial status), and four continuous ones. | |
101 | # | |
102 | #Warnings, some probabilities of classification are exactly equal to zero then we cannot use ClusVis | |
103 | # | |
104 | #```{r, results="show", include=TRUE, echo=TRUE} | |
105 | #data(finance) | |
106 | #x <- finance[,-2] | |
107 | #for (i in 3:5) | |
108 | #{ | |
109 | # print(paste("Resultats en", i, "classes")) | |
110 | # grlplot(x, i, TRUE) | |
111 | #} | |
112 | #``` | |
113 | # | |
114 | ### "Cathy dataset" (12 clusters) | |
115 | # | |
116 | #Warnings, some probabilities of classification are exactly equal to zero then we cannot use ClusVis | |
117 | # | |
118 | #```{r, results="hide", include=TRUE, echo=TRUE} | |
119 | #cathy12 <- as.matrix(read.table("data/probapostCatdbBlocAtrazine-K12.txt")) | |
120 | #resvisu <- clusvis(log(cathy12), prop = colMeans(cathy12)) | |
121 | #par(mfrow=c(2,2), mar=c(4,4,2,2), mgp=c(2,1,0)) | |
122 | #plotDensityClusVisu(resvisu, positionlegend = NULL) | |
123 | #plotDensityClusVisu(resvisu, add.obs = TRUE, positionlegend = NULL) | |
124 | #plotPCA(cathy12) | |
125 | #``` | |
126 | # | |
127 | ### Pima indian diabete | |
128 | # | |
129 | #[Source.](https://gist.github.com/ktisha/c21e73a1bd1700294ef790c56c8aec1f) | |
130 | # | |
131 | #```{r, results="show", include=TRUE, echo=TRUE} | |
132 | #load("data/pimaData.rda") | |
133 | #for (i in 3:5) | |
134 | #{ | |
135 | # print(paste("Resultats en", i, "classes")) | |
136 | # grlplot(x, i) | |
137 | #} | |
138 | #``` | |
139 | # | |
140 | ### Breast cancer | |
141 | # | |
142 | #[Source.](http://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+\%28diagnostic\%29) | |
143 | # | |
144 | #```{r, results="show", include=TRUE, echo=TRUE} | |
145 | #load("data/wdbc.rda") | |
146 | #for (i in 3:5) | |
147 | #{ | |
148 | # print(paste("Resultats en", i, "classes")) | |
149 | # grlplot(x, i) | |
150 | #} | |
151 | #``` | |
152 | # | |
153 | ### House-votes | |
154 | # | |
155 | #```{r, results="show", include=TRUE, echo=TRUE} | |
156 | #load("data/house-votes.rda") | |
157 | #for (i in 3:5) | |
158 | #{ | |
159 | # print(paste("Resultats en", i, "classes")) | |
160 | # grlplot(x, i) | |
161 | #} | |
162 | #``` |