prepare EMGLLF / EMGrank wrappers, simplify folder generateTestData
[valse.git] / R / generateSampleInputs.R
CommitLineData
ef67d338
BA
1#' Generate a sample of (X,Y) of size n
2#' @param meanX matrix of group means for covariates (of size p*K)
3#' @param covX covariance for covariates (of size p*p*K)
4#' @param covY covariance for the response vector (of size m*m*K)
5#' @param pi proportion for each cluster
6#' @param beta regression matrix
7#' @param n sample size
8#'
9#' @return list with X and Y
10#' @export
11generateXY = function(meanX, covX, covY, pi, beta, n)
12{
13 p = dim(covX)[1]
14 m = dim(covY)[1]
15 k = dim(covX)[3]
16
17 X = matrix(nrow=n,ncol=p)
18 Y = matrix(nrow=n,ncol=m)
19
20 require(MASS) #simulate from a multivariate normal distribution
21 for (i in 1:n)
22 {
23 class = sample(1:k, 1, prob=pi)
24 X[i,] = mvrnorm(1, meanX[,class], covX[,,class])
25 Y[i,] = mvrnorm(1, X[i,] %*% beta[,,class], covY[,,class])
26 }
27
28 return (list(X=X,Y=Y))
29}
30
31#' Generate a sample of (X,Y) of size n with default values
32#' @param n sample size
33#' @param p number of covariates
34#' @param m size of the response
35#' @param k number of clusters
36#' @return list with X and Y
37#' @export
38generateXYdefault = function(n, p, m, k)
39{
40 rangeX = 100
41 meanX = rangeX * matrix(1 - 2*runif(p*k), ncol=k)
42 covX = array(dim=c(p,p,k))
43 covY = array(dim=c(m,m,k))
44 for(r in 1:k)
45 {
46 covX[,,r] = diag(p)
47 covY[,,r] = diag(m)
48 }
49 pi = rep(1./k,k)
50 #initialize beta to a random number of non-zero random value
51 beta = array(0, dim=c(p,m,k))
52 for (j in 1:p)
53 {
54 nonZeroCount = sample(1:m, 1)
55 beta[j,1:nonZeroCount,] = matrix(runif(nonZeroCount*k), ncol=k)
56 }
57
58 sample_IO = generateXY(meanX, covX, covY, pi, beta, n)
59 return (list(X=sample_IO$X,Y=sample_IO$Y))
60}
61
62#' Initialize the parameters in a basic way (zero for the conditional mean, uniform for weights,
63#' identity for covariance matrices, and uniformly distributed for the clustering)
64#' @param n sample size
65#' @param p number of covariates
66#' @param m size of the response
67#' @param k number of clusters
68#' @return list with phiInit, rhoInit,piInit,gamInit
69#' @export
70basicInitParameters = function(n,p,m,k)
71{
72 phiInit = array(0, dim=c(p,m,k))
73
74 piInit = (1./k)*rep(1,k)
75
76 rhoInit = array(dim=c(m,m,k))
77 for (i in 1:k)
78 rhoInit[,,i] = diag(m)
79
80 gamInit = 0.1 * matrix(1, nrow=n, ncol=k)
81 R = sample(1:k, n, replace=TRUE)
82 for (i in 1:n)
83 gamInit[i,R[i]] = 0.9
84 gamInit = gamInit/sum(gamInit[1,])
85
86 return (list("phiInit" = phiInit, "rhoInit" = rhoInit, "piInit" = piInit, "gamInit" = gamInit))
87}