prepare wrappers EMGLLF.R --> EMGLLF.c
[valse.git] / pkg / R / generateSampleInputs.R
CommitLineData
ef67d338 1#' Generate a sample of (X,Y) of size n
f227455a 2#' @param meanX matrix of group means for covariates (of size p)
3#' @param covX covariance for covariates (of size p*p)
ef67d338
BA
4#' @param covY covariance for the response vector (of size m*m*K)
5#' @param pi proportion for each cluster
f227455a 6#' @param beta regression matrix, of size p*m*k
ef67d338
BA
7#' @param n sample size
8#'
9#' @return list with X and Y
10#' @export
11generateXY = function(meanX, covX, covY, pi, beta, n)
12{
13 p = dim(covX)[1]
14 m = dim(covY)[1]
f227455a 15 k = dim(covY)[3]
ef67d338
BA
16
17 X = matrix(nrow=n,ncol=p)
18 Y = matrix(nrow=n,ncol=m)
f227455a 19 class = matrix(nrow = n)
ef67d338
BA
20
21 require(MASS) #simulate from a multivariate normal distribution
22 for (i in 1:n)
23 {
f227455a 24 class[i] = sample(1:k, 1, prob=pi)
25 X[i,] = mvrnorm(1, meanX, covX)
26 print(X[i,])
27 print(beta[,,class[i]])
28 Y[i,] = mvrnorm(1, X[i,] %*% beta[,,class[i]], covY[,,class[i]])
ef67d338
BA
29 }
30
f227455a 31 return (list(X=X,Y=Y, class = class))
ef67d338
BA
32}
33
34#' Generate a sample of (X,Y) of size n with default values
35#' @param n sample size
36#' @param p number of covariates
37#' @param m size of the response
38#' @param k number of clusters
39#' @return list with X and Y
40#' @export
41generateXYdefault = function(n, p, m, k)
42{
43 rangeX = 100
44 meanX = rangeX * matrix(1 - 2*runif(p*k), ncol=k)
45 covX = array(dim=c(p,p,k))
46 covY = array(dim=c(m,m,k))
47 for(r in 1:k)
48 {
49 covX[,,r] = diag(p)
50 covY[,,r] = diag(m)
51 }
52 pi = rep(1./k,k)
53 #initialize beta to a random number of non-zero random value
54 beta = array(0, dim=c(p,m,k))
55 for (j in 1:p)
56 {
57 nonZeroCount = sample(1:m, 1)
58 beta[j,1:nonZeroCount,] = matrix(runif(nonZeroCount*k), ncol=k)
59 }
60
61 sample_IO = generateXY(meanX, covX, covY, pi, beta, n)
62 return (list(X=sample_IO$X,Y=sample_IO$Y))
63}
64
65#' Initialize the parameters in a basic way (zero for the conditional mean, uniform for weights,
66#' identity for covariance matrices, and uniformly distributed for the clustering)
67#' @param n sample size
68#' @param p number of covariates
69#' @param m size of the response
70#' @param k number of clusters
71#' @return list with phiInit, rhoInit,piInit,gamInit
72#' @export
73basicInitParameters = function(n,p,m,k)
74{
75 phiInit = array(0, dim=c(p,m,k))
76
77 piInit = (1./k)*rep(1,k)
78
79 rhoInit = array(dim=c(m,m,k))
80 for (i in 1:k)
81 rhoInit[,,i] = diag(m)
82
83 gamInit = 0.1 * matrix(1, nrow=n, ncol=k)
84 R = sample(1:k, n, replace=TRUE)
85 for (i in 1:n)
86 gamInit[i,R[i]] = 0.9
87 gamInit = gamInit/sum(gamInit[1,])
88
89 return (list("phiInit" = phiInit, "rhoInit" = rhoInit, "piInit" = piInit, "gamInit" = gamInit))
90}