#-----------------------------------------------------------------------
#' Initialize the parameters in a basic way (zero for the conditional mean,
-#' uniform for weights, identity for covariance matrices, and uniformly distributed forthe clustering)
+#' uniform for weights, identity for covariance matrices, and uniformly distributed forthe clustering)
#' @param n sample size
#' @param p number of covariates
#' @param m size of the response
#-----------------------------------------------------------------------
basic_Init_Parameters = function(n,p,m,k)
{
- phiInit = array(0, dim=c(p,m,k))
-
- piInit = (1./k)*rep.int(1,k)
-
- rhoInit = array(0, dim=c(m,m,k))
- for(i in 1:k)
- rhoInit[,,i] = diag(m)
-
- gamInit = 0.1*array(1, dim=c(n,k))
- R = sample(1:k,n, replace=TRUE)
- for(i in 1:n)
- gamInit[i,R[i]] = 0.9
- gamInit = gamInit/sum(gamInit[1,])
-
- return (data = list(phiInit = phiInit, rhoInit = rhoInit, piInit = piInit, gamInit = gamInit))
+ phiInit = array(0, dim=c(p,m,k))
+
+ piInit = (1./k)*rep.int(1,k)
+
+ rhoInit = array(0, dim=c(m,m,k))
+ for(i in 1:k)
+ rhoInit[,,i] = diag(m)
+
+ gamInit = 0.1*array(1, dim=c(n,k))
+ R = sample(1:k,n, replace=TRUE)
+ for(i in 1:n)
+ gamInit[i,R[i]] = 0.9
+ gamInit = gamInit/sum(gamInit[1,])
+
+ return (data = list(phiInit = phiInit, rhoInit = rhoInit, piInit = piInit, gamInit = gamInit))
}
#' @param pi weight parameters (of size K*size(gridLambda))
#'
#' @return a list with update B1, B2, glambda, rho and pi, and ind the vector of indices
-#' of selected models.
+#' of selected models.
#' @export
discardSimilarModels = function(B1,B2,glambda,rho,pi)
{
- ind = c()
- for (j in 1:length(glambda))
- {
- for (ll in 1:(l-1))
- {
- if(B1[,,l] == B1[,,ll])
- ind = c(ind, l)
- }
- }
- ind = unique(ind)
- B1 = B1[,,-ind]
- glambda = glambda[-ind]
- B2 = B2[,,-ind]
- rho = rho[,,,-ind]
- pi = pi[,-ind]
-
- return (list(B1=B1,B2=B2,glambda=glambda,rho=rho,pi=pi,ind=ind))
+ ind = c()
+ for (j in 1:length(glambda))
+ {
+ for (ll in 1:(l-1))
+ {
+ if(B1[,,l] == B1[,,ll])
+ ind = c(ind, l)
+ }
+ }
+ ind = unique(ind)
+ B1 = B1[,,-ind]
+ glambda = glambda[-ind]
+ B2 = B2[,,-ind]
+ rho = rho[,,,-ind]
+ pi = pi[,-ind]
+
+ return (list(B1=B1,B2=B2,glambda=glambda,rho=rho,pi=pi,ind=ind))
}
#' Generate a sample of (X,Y) of size n
#' @param covX covariance for covariates (of size p*p*K)
#' @param covY covariance for the response vector (of size m*m*K)
-#' @param pi proportion for each cluster
+#' @param pi proportion for each cluster
#' @param beta regression matrix
-#' @param n sample size
+#' @param n sample size
#'
#' @return list with X and Y
#' @export
#-----------------------------------------------------------------------
generateIO = function(covX, covY, pi, beta, n)
{
- p = dim(covX)[1]
-
- m = dim(covY)[1]
- k = dim(covY)[3]
-
- Y = matrix(0,n,m)
- require(mvtnorm)
- X = rmvnorm(n, mean = rep(0,p), sigma = covX)
-
- require(MASS) #simulate from a multivariate normal distribution
- for (i in 1:n)
- {
-
- for (r in 1:k)
- {
- BXir = rep(0,m)
- for (mm in 1:m)
- BXir[mm] = X[i,] %*% beta[,mm,r]
- Y[i,] = Y[i,] + pi[r] * mvrnorm(1,BXir, covY[,,r])
- }
- }
-
- return (list(X=X,Y=Y))
+ p = dim(covX)[1]
+
+ m = dim(covY)[1]
+ k = dim(covY)[3]
+
+ Y = matrix(0,n,m)
+ require(mvtnorm)
+ X = rmvnorm(n, mean = rep(0,p), sigma = covX)
+
+ require(MASS) #simulate from a multivariate normal distribution
+ for (i in 1:n)
+ {
+
+ for (r in 1:k)
+ {
+ BXir = rep(0,m)
+ for (mm in 1:m)
+ BXir[mm] = X[i,] %*% beta[,mm,r]
+ Y[i,] = Y[i,] + pi[r] * mvrnorm(1,BXir, covY[,,r])
+ }
+ }
+
+ return (list(X=X,Y=Y))
}
#-----------------------------------------------------------------------
generateIOdefault = function(n, p, m, k)
{
- covX = diag(p)
- covY = array(0, dim=c(m,m,k))
- for(r in 1:k)
- {
- covY[,,r] = diag(m)
- }
-
- pi = rep(1./k,k)
-
- beta = array(0, dim=c(p,m,k))
- for(j in 1:p)
- {
- nonZeroCount = ceiling(m * runif(1))
- beta[j,1:nonZeroCount,] = matrix(runif(nonZeroCount*k), ncol=k)
- }
-
- sample_IO = generateIO(covX, covY, pi, beta, n)
- return (list(X=sample_IO$X,Y=sample_IO$Y))
+ covX = diag(p)
+ covY = array(0, dim=c(m,m,k))
+ for(r in 1:k)
+ {
+ covY[,,r] = diag(m)
+ }
+
+ pi = rep(1./k,k)
+
+ beta = array(0, dim=c(p,m,k))
+ for(j in 1:p)
+ {
+ nonZeroCount = ceiling(m * runif(1))
+ beta[j,1:nonZeroCount,] = matrix(runif(nonZeroCount*k), ncol=k)
+ }
+
+ sample_IO = generateIO(covX, covY, pi, beta, n)
+ return (list(X=sample_IO$X,Y=sample_IO$Y))
}
#' Construct the data-driven grid for the regularization parameters used for the Lasso estimator
#' @param phiInit value for phi
-#' @param rhoInt value for rho
-#' @param piInit value for pi
+#' @param rhoInt value for rho
+#' @param piInit value for pi
#' @param gamInit value for gamma
-#' @param mini minimum number of iterations in EM algorithm
-#' @param maxi maximum number of iterations in EM algorithm
-#' @param tau threshold to stop EM algorithm
+#' @param mini minimum number of iterations in EM algorithm
+#' @param maxi maximum number of iterations in EM algorithm
+#' @param tau threshold to stop EM algorithm
#' @return the grid of regularization parameters
#' @export
#-----------------------------------------------------------------------
gridLambda = function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini, maxi, tau)
{
- n = nrow(X)
- p = dim(phiInit)[1]
- m = dim(phiInit)[2]
- k = dim(phiInit)[3]
-
- list_EMG = .Call("EMGLLF",phiInit,rhoInit,piInit,gamInit,mini,maxi,1,0,X,Y,tau)
-
- grid = array(0, dim=c(p,m,k))
- for (i in 1:p)
- {
- for (j in 1:m)
- grid[i,j,] = abs(list_EMG$S[i,j,]) / (n*list_EMG$pi^gamma)
- }
- grid = unique(grid)
- grid = grid[grid <=1]
-
- return(grid)
+ n = nrow(X)
+ p = dim(phiInit)[1]
+ m = dim(phiInit)[2]
+ k = dim(phiInit)[3]
+
+ list_EMG = .Call("EMGLLF",phiInit,rhoInit,piInit,gamInit,mini,maxi,1,0,X,Y,tau)
+
+ grid = array(0, dim=c(p,m,k))
+ for (i in 1:p)
+ {
+ for (j in 1:m)
+ grid[i,j,] = abs(list_EMG$S[i,j,]) / (n*list_EMG$pi^gamma)
+ }
+ grid = unique(grid)
+ grid = grid[grid <=1]
+
+ return(grid)
}
#' @param thresh threshold to say a cofficient is equal to zero
#'
#' @return a list with A, a matrix with relevant indices (size = p*m) and B, a
-#' matrix with irrelevant indices (size = p*m)
+#' matrix with irrelevant indices (size = p*m)
#' @export
indicesSelection = function(phi, thresh = 1e-6)
{
- dim_phi = dim(phi)
- p = dim_phi[1]
- m = dim_phi[2]
-
- A = matrix(0, p, m)
- B = matrix(0, p, m)
-
- for(j in 1:p)
- {
- cpt1 = 0
- cpt2 = 0
- for(mm in 1:m)
- {
- if(max(phi[j,mm,]) > thresh)
- {
- cpt1 = cpt1 + 1
- A[j,cpt] = mm
- } else
- {
- cpt2 = cpt2+1
- B[j, cpt2] = mm
- }
- }
- }
- return (list(A=A,B=B))
+ dim_phi = dim(phi)
+ p = dim_phi[1]
+ m = dim_phi[2]
+
+ A = matrix(0, p, m)
+ B = matrix(0, p, m)
+
+ for(j in 1:p)
+ {
+ cpt1 = 0
+ cpt2 = 0
+ for(mm in 1:m)
+ {
+ if(max(phi[j,mm,]) > thresh)
+ {
+ cpt1 = cpt1 + 1
+ A[j,cpt] = mm
+ } else
+ {
+ cpt2 = cpt2+1
+ B[j, cpt2] = mm
+ }
+ }
+ }
+ return (list(A=A,B=B))
}
#' @export
initSmallEM = function(k,X,Y,tau)
{
- n = nrow(Y)
- m = ncol(Y)
- p = ncol(X)
-
- betaInit1 = array(0, dim=c(p,m,k,20))
- sigmaInit1 = array(0, dim = c(m,m,k,20))
- phiInit1 = array(0, dim = c(p,m,k,20))
- rhoInit1 = array(0, dim = c(m,m,k,20))
- piInit1 = matrix(0,20,k)
- gamInit1 = array(0, dim=c(n,k,20))
- LLFinit1 = list()
-
- require(MASS) #Moore-Penrose generalized inverse of matrix
- require(mclust) # K-means with selection of K
- for(repet in 1:20)
- {
- clusters = Mclust(matrix(c(X,Y),nrow=n),k) #default distance : euclidean
- Zinit1[,repet] = clusters$classification
-
- for(r in 1:k)
- {
- Z = Zinit1[,repet]
- Z_bin = vec_bin(Z,r)
- Z_vec = Z_bin$Z #vecteur 0 et 1 aux endroits o? Z==r
- Z_indice = Z_bin$indice #renvoit les indices o? Z==r
-
- betaInit1[,,r,repet] =
- ginv(t(x[Z_indice,])%*%x[Z_indice,])%*%t(x[Z_indice,])%*%y[Z_indice,]
- sigmaInit1[,,r,repet] = diag(m)
- phiInit1[,,r,repet] = betaInit1[,,r,repet]/sigmaInit1[,,r,repet]
- rhoInit1[,,r,repet] = solve(sigmaInit1[,,r,repet])
- piInit1[repet,r] = sum(Z_vec)/n
- }
-
- for(i in 1:n)
- {
- for(r in 1:k)
- {
- dotProduct = (y[i,]%*%rhoInit1[,,r,repet]-x[i,]%*%phiInit1[,,r,repet]) %*%
- (y[i,]%*%rhoInit1[,,r,repet]-x[i,]%*%phiInit1[,,r,repet])
- Gam[i,r] = piInit1[repet,r]*det(rhoInit1[,,r,repet])*exp(-0.5*dotProduct)
- }
- sumGamI = sum(gam[i,])
- gamInit1[i,,repet]= Gam[i,] / sumGamI
- }
-
- miniInit = 10
- maxiInit = 11
-
- new_EMG = .Call("EMGLLF",phiInit1[,,,repet],rhoInit1[,,,repet],piInit1[repet,],
- gamInit1[,,repet],miniInit,maxiInit,1,0,x,y,tau)
- LLFEessai = new_EMG$LLF
- LLFinit1[repet] = LLFEessai[length(LLFEessai)]
- }
-
- b = which.max(LLFinit1)
- phiInit = phiInit1[,,,b]
- rhoInit = rhoInit1[,,,b]
- piInit = piInit1[b,]
- gamInit = gamInit1[,,b]
-
- return (list(phiInit=phiInit, rhoInit=rhoInit, piInit=piInit, gamInit=gamInit))
+ n = nrow(Y)
+ m = ncol(Y)
+ p = ncol(X)
+
+ betaInit1 = array(0, dim=c(p,m,k,20))
+ sigmaInit1 = array(0, dim = c(m,m,k,20))
+ phiInit1 = array(0, dim = c(p,m,k,20))
+ rhoInit1 = array(0, dim = c(m,m,k,20))
+ piInit1 = matrix(0,20,k)
+ gamInit1 = array(0, dim=c(n,k,20))
+ LLFinit1 = list()
+
+ require(MASS) #Moore-Penrose generalized inverse of matrix
+ require(mclust) # K-means with selection of K
+ for(repet in 1:20)
+ {
+ clusters = Mclust(matrix(c(X,Y),nrow=n),k) #default distance : euclidean
+ Zinit1[,repet] = clusters$classification
+
+ for(r in 1:k)
+ {
+ Z = Zinit1[,repet]
+ Z_bin = vec_bin(Z,r)
+ Z_vec = Z_bin$Z #vecteur 0 et 1 aux endroits o? Z==r
+ Z_indice = Z_bin$indice #renvoit les indices o? Z==r
+
+ betaInit1[,,r,repet] =
+ ginv(t(x[Z_indice,])%*%x[Z_indice,])%*%t(x[Z_indice,])%*%y[Z_indice,]
+ sigmaInit1[,,r,repet] = diag(m)
+ phiInit1[,,r,repet] = betaInit1[,,r,repet]/sigmaInit1[,,r,repet]
+ rhoInit1[,,r,repet] = solve(sigmaInit1[,,r,repet])
+ piInit1[repet,r] = sum(Z_vec)/n
+ }
+
+ for(i in 1:n)
+ {
+ for(r in 1:k)
+ {
+ dotProduct = (y[i,]%*%rhoInit1[,,r,repet]-x[i,]%*%phiInit1[,,r,repet]) %*%
+ (y[i,]%*%rhoInit1[,,r,repet]-x[i,]%*%phiInit1[,,r,repet])
+ Gam[i,r] = piInit1[repet,r]*det(rhoInit1[,,r,repet])*exp(-0.5*dotProduct)
+ }
+ sumGamI = sum(gam[i,])
+ gamInit1[i,,repet]= Gam[i,] / sumGamI
+ }
+
+ miniInit = 10
+ maxiInit = 11
+
+ new_EMG = .Call("EMGLLF",phiInit1[,,,repet],rhoInit1[,,,repet],piInit1[repet,],
+ gamInit1[,,repet],miniInit,maxiInit,1,0,x,y,tau)
+ LLFEessai = new_EMG$LLF
+ LLFinit1[repet] = LLFEessai[length(LLFEessai)]
+ }
+
+ b = which.max(LLFinit1)
+ phiInit = phiInit1[,,,b]
+ rhoInit = rhoInit1[,,,b]
+ piInit = piInit1[b,]
+ gamInit = gamInit1[,,b]
+
+ return (list(phiInit=phiInit, rhoInit=rhoInit, piInit=piInit, gamInit=gamInit))
}
#' the likelihood if there were several with the same dimension
#'
#' @param LLF a matrix, the first column corresponds to likelihoods for several models
-#' the second column corresponds to the dimensions of the corresponding models.
+#' the second column corresponds to the dimensions of the corresponding models.
#'
#' @return a list with indices, a vector of indices selected models,
-#' and D1, a vector of corresponding dimensions
+#' and D1, a vector of corresponding dimensions
#' @export
#'
modelSelection = function(LLF)
{
- D = LLF[,2]
- D1 = unique(D)
-
- indices = rep(1, length(D1))
- #select argmax MLE
- if (length(D1)>2)
- {
- for (i in 1:length(D1))
- {
- A = c()
- for (j in 1:length(D))
- {
- if(D[[j]]==D1[[i]])
- a = c(a, LLF[j,1])
- }
- b = max(a)
- #indices[i] : first indices of the binary vector where u_i ==1
- indices[i] = which.max(vec_bin(LLF,b)[[1]])
- }
- }
-
- return (list(indices=indices,D1=D1))
+ D = LLF[,2]
+ D1 = unique(D)
+
+ indices = rep(1, length(D1))
+ #select argmax MLE
+ if (length(D1)>2)
+ {
+ for (i in 1:length(D1))
+ {
+ A = c()
+ for (j in 1:length(D))
+ {
+ if(D[[j]]==D1[[i]])
+ a = c(a, LLF[j,1])
+ }
+ b = max(a)
+ #indices[i] : first indices of the binary vector where u_i ==1
+ indices[i] = which.max(vec_bin(LLF,b)[[1]])
+ }
+ }
+
+ return (list(indices=indices,D1=D1))
}
#' @return a list with Z (a binary vector of size the size of X) and indices where Z is equal to 1
vec_bin = function(X,r)
{
- Z = rep(0,length(X))
- indice = c()
- j = 1
- for (i in 1:length(X))
- {
- if(X[i] == r)
- {
- Z[i] = 1
- indice[j] = i
- j=j+1
- } else
- Z[i] = 0
- }
- return (list(Z=Z,indice=indice))
+ Z = rep(0,length(X))
+ indice = c()
+ j = 1
+ for (i in 1:length(X))
+ {
+ if(X[i] == r)
+ {
+ Z[i] = 1
+ indice[j] = i
+ j=j+1
+ } else
+ Z[i] = 0
+ }
+ return (list(Z=Z,indice=indice))
}
\ No newline at end of file