X-Git-Url: https://git.auder.net/?p=morpheus.git;a=blobdiff_plain;f=pkg%2FR%2FsampleIO.R;h=9419c32c12c7cca866251a0e2c8129677d230924;hp=5e458377dd1bd523ebf0c93ea2afe86b1f788c4b;hb=ab35f6102896a49e86e853262c0650faa2931638;hpb=cbd88fe5729bf206a784238a2637aa60e697fcdc diff --git a/pkg/R/sampleIO.R b/pkg/R/sampleIO.R index 5e45837..9419c32 100644 --- a/pkg/R/sampleIO.R +++ b/pkg/R/sampleIO.R @@ -4,8 +4,10 @@ #' into K groups of proportions p. Inside one group, the probability law P(Y=1) is #' described by the corresponding column parameter in the matrix β + intercept b. #' +#' @name generateSampleIO +#' #' @param n Number of individuals -#' @param p Vector of K-1 populations relative proportions (sum <= 1) +#' @param p Vector of K(-1) populations relative proportions (sum (<)= 1) #' @param β Vectors of model parameters for each population, of size dxK #' @param b Vector of intercept values (use rep(0,K) for no intercept) #' @param link Link type; "logit" or "probit" @@ -17,55 +19,57 @@ #' \item{index: the population index (in 1:K) for each row in X} #' } #' +#' @examples +#' # K = 3 so we give first two components of p: 0.3 and 0.3 (p[3] = 0.4) +#' io <- generateSampleIO(1000, c(.3,.3), +#' matrix(c(1,3,-1,1,2,1),ncol=3), c(.5,-1,0), "logit") +#' io$index[1] #number of the group of X[1,] and Y[1] (in 1...K) +#' #' @export -generateSampleIO = function(n, p, β, b, link) +generateSampleIO <- function(n, p, β, b, link) { - # Check arguments - tryCatch({n = as.integer(n)}, error=function(e) stop("Cannot convert n to integer")) - if (length(n) > 1) - warning("n is a vector but should be scalar: only first element used") - if (n <= 0) - stop("n: positive integer") - if (!is.matrix(β) || !is.numeric(β) || any(is.na(β))) - stop("β: real matrix, no NAs") - K = ncol(β) - if (!is.numeric(p) || length(p)!=K-1 || any(is.na(p)) || any(p<0) || sum(p) > 1) - stop("p: positive vector of size K-1, no NA, sum<=1") - p <- c(p, 1-sum(p)) - if (!is.numeric(b) || length(b)!=K || any(is.na(b))) - stop("b: real vector of size K, no NA") + # Check arguments + tryCatch({n <- as.integer(n)}, error=function(e) stop("Cannot convert n to integer")) + if (length(n) > 1) + warning("n is a vector but should be scalar: only first element used") + if (n <= 0) + stop("n: positive integer") + if (!is.matrix(β) || !is.numeric(β) || any(is.na(β))) + stop("β: real matrix, no NAs") + K <- ncol(β) + if (!is.numeric(p) || length(p) 1) + stop("p: positive vector of size >= K-1, no NA, sum(<)=1") + if (length(p) == K-1) + p <- c(p, 1-sum(p)) + if (!is.numeric(b) || length(b)!=K || any(is.na(b))) + stop("b: real vector of size K, no NA") - #random generation of the size of each population in X~Y (unordered) - classes = rmultinom(1, n, p) + # Random generation of the size of each population in X~Y (unordered) + classes <- rmultinom(1, n, p) - d = nrow(β) - zero_mean = rep(0,d) - id_sigma = diag(rep(1,d)) - # Always consider an intercept (use b=0 for none) - d = d + 1 - β = rbind(β, b) - X = matrix(nrow=0, ncol=d) - Y = c() - index = c() - for (i in 1:ncol(β)) - { - index = c(index, rep(i, classes[i])) - newXblock = cbind( MASS::mvrnorm(classes[i], zero_mean, id_sigma), 1 ) - arg_link = newXblock%*%β[,i] - probas = - if (link == "logit") - { - e_arg_link = exp(arg_link) - e_arg_link / (1 + e_arg_link) - } - else #"probit" - pnorm(arg_link) - probas[is.nan(probas)] = 1 #overflow of exp(x) - X = rbind(X, newXblock) - Y = c( Y, vapply(probas, function(p) (rbinom(1,1,p)), 1) ) - } - shuffle = sample(n) - # Returned X should not contain an intercept column (it's an argument of estimation - # methods) - list("X"=X[shuffle,-d], "Y"=Y[shuffle], "index"=index[shuffle]) + d <- nrow(β) + zero_mean <- rep(0,d) + id_sigma <- diag(rep(1,d)) + X <- matrix(nrow=0, ncol=d) + Y <- c() + index <- c() + for (i in 1:K) + { + index <- c(index, rep(i, classes[i])) + newXblock <- MASS::mvrnorm(classes[i], zero_mean, id_sigma) + arg_link <- newXblock %*% β[,i] + b[i] + probas <- + if (link == "logit") + { + e_arg_link = exp(arg_link) + e_arg_link / (1 + e_arg_link) + } + else #"probit" + pnorm(arg_link) + probas[is.nan(probas)] = 1 #overflow of exp(x) + X <- rbind(X, newXblock) + Y <- c( Y, vapply(probas, function(p) (rbinom(1,1,p)), 1) ) + } + shuffle <- sample(n) + list("X"=X[shuffle,], "Y"=Y[shuffle], "index"=index[shuffle]) }