c7aa3c6a78bd2c0b033467b3fa317d00af0dd5a4
[valse.git] / pkg / R / generateSampleInputs.R
1 #' Generate a sample of (X,Y) of size n
2 #' @param meanX matrix of group means for covariates (of size p)
3 #' @param covX covariance for covariates (of size p*p)
4 #' @param covY covariance for the response vector (of size m*m*K)
5 #' @param pi proportion for each cluster
6 #' @param beta regression matrix, of size p*m*k
7 #' @param n sample size
8 #'
9 #' @return list with X and Y
10 #' @export
11 generateXY = function(meanX, covX, covY, pi, beta, n)
12 {
13 p = dim(covX)[1]
14 m = dim(covY)[1]
15 k = dim(covY)[3]
16
17 X = matrix(nrow=n,ncol=p)
18 Y = matrix(nrow=n,ncol=m)
19 class = matrix(nrow = n)
20
21 require(MASS) #simulate from a multivariate normal distribution
22 for (i in 1:n)
23 {
24 class[i] = sample(1:k, 1, prob=pi)
25 X[i,] = mvrnorm(1, meanX, covX)
26 Y[i,] = mvrnorm(1, X[i,] %*% beta[,,class[i]], covY[,,class[i]])
27 }
28
29 return (list(X=X,Y=Y, class = class))
30 }
31
32 #' Generate a sample of (X,Y) of size n with default values
33 #' @param n sample size
34 #' @param p number of covariates
35 #' @param m size of the response
36 #' @param k number of clusters
37 #' @return list with X and Y
38 #' @export
39 generateXYdefault = function(n, p, m, k)
40 {
41 meanX = rep(0, p)
42 covX = diag(p)
43 covY = array(dim=c(m,m,k))
44 for(r in 1:k)
45 {
46 covY[,,r] = diag(m)
47 }
48 pi = rep(1./k,k)
49 #initialize beta to a random number of non-zero random value
50 beta = array(0, dim=c(p,m,k))
51 for (j in 1:p)
52 {
53 nonZeroCount = sample(1:m, 1)
54 beta[j,1:nonZeroCount,] = matrix(runif(nonZeroCount*k), ncol=k)
55 }
56
57 sample_IO = generateXY(meanX, covX, covY, pi, beta, n)
58 return (list(X=sample_IO$X,Y=sample_IO$Y))
59 }
60
61 #' Initialize the parameters in a basic way (zero for the conditional mean, uniform for weights,
62 #' identity for covariance matrices, and uniformly distributed for the clustering)
63 #' @param n sample size
64 #' @param p number of covariates
65 #' @param m size of the response
66 #' @param k number of clusters
67 #' @return list with phiInit, rhoInit,piInit,gamInit
68 #' @export
69 basicInitParameters = function(n,p,m,k)
70 {
71 phiInit = array(0, dim=c(p,m,k))
72
73 piInit = (1./k)*rep(1,k)
74
75 rhoInit = array(dim=c(m,m,k))
76 for (i in 1:k)
77 rhoInit[,,i] = diag(m)
78
79 gamInit = 0.1 * matrix(1, nrow=n, ncol=k)
80 R = sample(1:k, n, replace=TRUE)
81 for (i in 1:n)
82 gamInit[i,R[i]] = 0.9
83 gamInit = gamInit/sum(gamInit[1,])
84
85 return (list("phiInit" = phiInit, "rhoInit" = rhoInit, "piInit" = piInit, "gamInit" = gamInit))
86 }