39453227c5847f0b4341de18287ae5d773492e3b
[valse.git] / pkg / R / initSmallEM.R
1 #' initSmallEM
2 #'
3 #' initialization of the EM algorithm
4 #'
5 #' @param k number of components
6 #' @param X matrix of covariates (of size n*p)
7 #' @param Y matrix of responses (of size n*m)
8 #' @param fast boolean to enable or not the C function call
9 #'
10 #' @return a list with phiInit (the regression parameter reparametrized),
11 #' rhoInit (the covariance parameter reparametrized), piInit (the proportion parameter is the
12 #' mixture model), gamInit (the conditional expectation)
13 #'
14 #' @importFrom stats cutree dist hclust runif
15 #'
16 #' @export
17 initSmallEM <- function(k, X, Y, fast)
18 {
19 n <- nrow(X)
20 p <- ncol(X)
21 m <- ncol(Y)
22 nIte <- 20
23 Zinit1 <- array(0, dim = c(n, nIte))
24 betaInit1 <- array(0, dim = c(p, m, k, nIte))
25 sigmaInit1 <- array(0, dim = c(m, m, k, nIte))
26 phiInit1 <- array(0, dim = c(p, m, k, nIte))
27 rhoInit1 <- array(0, dim = c(m, m, k, nIte))
28 Gam <- matrix(0, n, k)
29 piInit1 <- matrix(0, nIte, k)
30 gamInit1 <- array(0, dim = c(n, k, nIte))
31 LLFinit1 <- list()
32
33 # require(MASS) #Moore-Penrose generalized inverse of matrix
34 for (repet in 1:nIte)
35 {
36 distance_clus <- dist(cbind(X, Y))
37 tree_hier <- hclust(distance_clus)
38 Zinit1[, repet] <- cutree(tree_hier, k)
39
40 for (r in 1:k)
41 {
42 Z <- Zinit1[, repet]
43 Z_indice <- seq_len(n)[Z == r] #renvoit les indices ou Z==r
44 if (length(Z_indice) == 1) {
45 betaInit1[, , r, repet] <- MASS::ginv(crossprod(t(X[Z_indice, ]))) %*%
46 crossprod(t(X[Z_indice, ]), Y[Z_indice, ])
47 } else {
48 betaInit1[, , r, repet] <- MASS::ginv(crossprod(X[Z_indice, ])) %*%
49 crossprod(X[Z_indice, ], Y[Z_indice, ])
50 }
51 sigmaInit1[, , r, repet] <- diag(m)
52 phiInit1[, , r, repet] <- betaInit1[, , r, repet] #/ sigmaInit1[,,r,repet]
53 rhoInit1[, , r, repet] <- solve(sigmaInit1[, , r, repet])
54 piInit1[repet, r] <- mean(Z == r)
55 }
56
57 for (i in 1:n)
58 {
59 for (r in 1:k)
60 {
61 dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet]
62 - X[i, ] %*% phiInit1[, , r, repet])
63 Gam[i, r] <- piInit1[repet, r] *
64 det(rhoInit1[, , r, repet]) * exp(-0.5 * dotProduct)
65 }
66 sumGamI <- sum(Gam[i, ])
67 # TODO: next line is a division by zero if dotProduct is big
68 gamInit1[i, , repet] <- Gam[i, ]/sumGamI
69 }
70
71 miniInit <- 10
72 maxiInit <- 11
73
74 init_EMG <- EMGLLF(phiInit1[, , , repet], rhoInit1[, , , repet], piInit1[repet, ],
75 gamInit1[, , repet], miniInit, maxiInit, gamma = 1, lambda = 0, X, Y,
76 eps = 1e-04, fast)
77 LLFinit1[[repet]] <- init_EMG$llh
78 }
79 b <- which.min(LLFinit1)
80 phiInit <- phiInit1[, , , b]
81 rhoInit <- rhoInit1[, , , b]
82 piInit <- piInit1[b, ]
83 gamInit <- gamInit1[, , b]
84
85 list(phiInit = phiInit, rhoInit = rhoInit, piInit = piInit, gamInit = gamInit)
86 }