X-Git-Url: https://git.auder.net/?p=morpheus.git;a=blobdiff_plain;f=pkg%2FR%2FsampleIO.R;h=9419c32c12c7cca866251a0e2c8129677d230924;hp=5e458377dd1bd523ebf0c93ea2afe86b1f788c4b;hb=ab35f6102896a49e86e853262c0650faa2931638;hpb=cbd88fe5729bf206a784238a2637aa60e697fcdc

diff --git a/pkg/R/sampleIO.R b/pkg/R/sampleIO.R
index 5e45837..9419c32 100644
--- a/pkg/R/sampleIO.R
+++ b/pkg/R/sampleIO.R
@@ -4,8 +4,10 @@
 #' into K groups of proportions p. Inside one group, the probability law P(Y=1) is
 #' described by the corresponding column parameter in the matrix Î² + intercept b.
 #'
+#' @name generateSampleIO
+#'
 #' @param n Number of individuals
-#' @param p Vector of K-1 populations relative proportions (sum <= 1)
+#' @param p Vector of K(-1) populations relative proportions (sum (<)= 1)
 #' @param Î² Vectors of model parameters for each population, of size dxK
 #' @param b Vector of intercept values (use rep(0,K) for no intercept)
 #' @param link Link type; "logit" or "probit"
@@ -17,55 +19,57 @@
 #'   \item{index: the population index (in 1:K) for each row in X}
 #' }
 #'
+#' @examples
+#' # K = 3 so we give first two components of p: 0.3 and 0.3 (p[3] = 0.4)
+#' io <- generateSampleIO(1000, c(.3,.3),
+#'   matrix(c(1,3,-1,1,2,1),ncol=3), c(.5,-1,0), "logit")
+#' io$index[1] #number of the group of X[1,] and Y[1] (in 1...K)
+#'
 #' @export
-generateSampleIO = function(n, p, Î², b, link)
+generateSampleIO <- function(n, p, Î², b, link)
 {
-	# Check arguments
-	tryCatch({n = as.integer(n)}, error=function(e) stop("Cannot convert n to integer"))
-	if (length(n) > 1)
-		warning("n is a vector but should be scalar: only first element used")
-	if (n <= 0)
-		stop("n: positive integer")
-	if (!is.matrix(Î²) || !is.numeric(Î²) || any(is.na(Î²)))
-		stop("Î²: real matrix, no NAs")
-	K = ncol(Î²)
-	if (!is.numeric(p) || length(p)!=K-1 || any(is.na(p)) || any(p<0) || sum(p) > 1)
-		stop("p: positive vector of size K-1, no NA, sum<=1")
-	p <- c(p, 1-sum(p))
-	if (!is.numeric(b) || length(b)!=K || any(is.na(b)))
-		stop("b: real vector of size K, no NA")
+  # Check arguments
+  tryCatch({n <- as.integer(n)}, error=function(e) stop("Cannot convert n to integer"))
+  if (length(n) > 1)
+    warning("n is a vector but should be scalar: only first element used")
+  if (n <= 0)
+    stop("n: positive integer")
+  if (!is.matrix(Î²) || !is.numeric(Î²) || any(is.na(Î²)))
+    stop("Î²: real matrix, no NAs")
+  K <- ncol(Î²)
+  if (!is.numeric(p) || length(p)<K-1 || any(is.na(p)) || any(p<0) || sum(p) > 1)
+    stop("p: positive vector of size >= K-1, no NA, sum(<)=1")
+  if (length(p) == K-1)
+    p <- c(p, 1-sum(p))
+  if (!is.numeric(b) || length(b)!=K || any(is.na(b)))
+    stop("b: real vector of size K, no NA")
 
-	#random generation of the size of each population in X~Y (unordered)
-	classes = rmultinom(1, n, p)
+  # Random generation of the size of each population in X~Y (unordered)
+  classes <- rmultinom(1, n, p)
 
-	d = nrow(Î²)
-	zero_mean = rep(0,d)
-	id_sigma = diag(rep(1,d))
-	# Always consider an intercept (use b=0 for none)
-	d = d + 1
-	Î² = rbind(Î², b)
-	X = matrix(nrow=0, ncol=d)
-	Y = c()
-	index = c()
-	for (i in 1:ncol(Î²))
-	{
-		index = c(index, rep(i, classes[i]))
-		newXblock = cbind( MASS::mvrnorm(classes[i], zero_mean, id_sigma), 1 )
-		arg_link = newXblock%*%Î²[,i]
-		probas =
-			if (link == "logit")
-			{
-				e_arg_link = exp(arg_link)
-				e_arg_link / (1 + e_arg_link)
-			}
-			else #"probit"
-				pnorm(arg_link)
-		probas[is.nan(probas)] = 1 #overflow of exp(x)
-		X = rbind(X, newXblock)
-		Y = c( Y, vapply(probas, function(p) (rbinom(1,1,p)), 1) )
-	}
-	shuffle = sample(n)
-	# Returned X should not contain an intercept column (it's an argument of estimation
-	# methods)
-	list("X"=X[shuffle,-d], "Y"=Y[shuffle], "index"=index[shuffle])
+  d <- nrow(Î²)
+  zero_mean <- rep(0,d)
+  id_sigma <- diag(rep(1,d))
+  X <- matrix(nrow=0, ncol=d)
+  Y <- c()
+  index <- c()
+  for (i in 1:K)
+  {
+    index <- c(index, rep(i, classes[i]))
+    newXblock <- MASS::mvrnorm(classes[i], zero_mean, id_sigma)
+    arg_link <- newXblock %*% Î²[,i] + b[i]
+    probas <-
+      if (link == "logit")
+      {
+        e_arg_link = exp(arg_link)
+        e_arg_link / (1 + e_arg_link)
+      }
+      else #"probit"
+        pnorm(arg_link)
+    probas[is.nan(probas)] = 1 #overflow of exp(x)
+    X <- rbind(X, newXblock)
+    Y <- c( Y, vapply(probas, function(p) (rbinom(1,1,p)), 1) )
+  }
+  shuffle <- sample(n)
+  list("X"=X[shuffle,], "Y"=Y[shuffle], "index"=index[shuffle])
 }