remove pre-commit hook; fix weird formatting from formatR package

author Benjamin Auder <benjamin.auder@somewhere>

Fri, 14 Apr 2017 16:43:47 +0000 (18:43 +0200)

committer Benjamin Auder <benjamin.auder@somewhere>

Fri, 14 Apr 2017 16:43:47 +0000 (18:43 +0200)
author Benjamin Auder <benjamin.auder@somewhere>
Fri, 14 Apr 2017 16:43:47 +0000 (18:43 +0200)
committer Benjamin Auder <benjamin.auder@somewhere>
Fri, 14 Apr 2017 16:43:47 +0000 (18:43 +0200)
diff --git a/hooks/pre-commit b/hooks/pre-commit.bad

similarity index 100%

rename from hooks/pre-commit

rename to hooks/pre-commit.bad
diff --git a/pkg/R/EMGLLF.R b/pkg/R/EMGLLF.R

index ee7a4fc..0a279f0 100644 (file)
--- a/pkg/R/EMGLLF.R
+++ b/pkg/R/EMGLLF.R
@@ -24,14 +24,14 @@
  #' @export
  EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
    X, Y, eps, fast = TRUE)
-  {
+{
    if (!fast)
    {
      # Function in R
      return(.EMGLLF_R(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
        X, Y, eps))
    }
-  
+
    # Function in C
    n <- nrow(X)  #nombre d'echantillons
    p <- ncol(X)  #nombre de covariables
@@ -46,22 +46,21 @@ EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
  # R version - slow but easy to read
  .EMGLLF_R <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
    X2, Y, eps)
-  {
+{
    # Matrix dimensions
    n <- dim(Y)[1]
-  if (length(dim(phiInit)) == 2)
-  {
+  if (length(dim(phiInit)) == 2) {
      p <- 1
      m <- dim(phiInit)[1]
      k <- dim(phiInit)[2]
-  } else
-  {
+  } else {
      p <- dim(phiInit)[1]
      m <- dim(phiInit)[2]
      k <- dim(phiInit)[3]
    }
    X <- matrix(nrow = n, ncol = p)
    X[1:n, 1:p] <- X2
+
    # Outputs
    phi <- array(NA, dim = c(p, m, k))
    phi[1:p, , ] <- phiInit
@@ -69,7 +68,7 @@ EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
    pi <- piInit
    llh <- -Inf
    S <- array(0, dim = c(p, m, k))
-  
+
    # Algorithm variables
    gam <- gamInit
    Gram2 <- array(0, dim = c(p, p, k))
@@ -77,33 +76,37 @@ EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
    X2 <- array(0, dim = c(n, p, k))
    Y2 <- array(0, dim = c(n, m, k))
    EPS <- 1e-15
-  
+
    for (ite in 1:maxi)
    {
      # Remember last pi,rho,phi values for exit condition in the end of loop
      Phi <- phi
      Rho <- rho
      Pi <- pi
-    
+
      # Computations associated to X and Y
      for (r in 1:k)
      {
-      for (mm in 1:m) Y2[, mm, r] <- sqrt(gam[, r]) * Y[, mm]
-      for (i in 1:n) X2[i, , r] <- sqrt(gam[i, r]) * X[i, ]
-      for (mm in 1:m) ps2[, mm, r] <- crossprod(X2[, , r], Y2[, mm, r])
+      for (mm in 1:m)
+        Y2[, mm, r] <- sqrt(gam[, r]) * Y[, mm]
+      for (i in 1:n)
+        X2[i, , r] <- sqrt(gam[i, r]) * X[i, ]
+      for (mm in 1:m)
+        ps2[, mm, r] <- crossprod(X2[, , r], Y2[, mm, r])
        for (j in 1:p)
        {
-        for (s in 1:p) Gram2[j, s, r] <- crossprod(X2[, j, r], X2[, s, r])
+        for (s in 1:p)
+          Gram2[j, s, r] <- crossprod(X2[, j, r], X2[, s, r])
        }
      }
-    
-    ######### M step #
-    
+
+    ## M step
+
      # For pi
      b <- sapply(1:k, function(r) sum(abs(phi[, , r])))
      gam2 <- colSums(gam)
      a <- sum(gam %*% log(pi))
-    
+
      # While the proportions are nonpositive
      kk <- 0
      pi2AllPositive <- FALSE
@@ -113,66 +116,64 @@ EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
        pi2AllPositive <- all(pi2 >= 0)
        kk <- kk + 1
      }
-    
+
      # t(m) is the largest value in the grid O.1^k such that it is nonincreasing
-    while (kk < 1000 && -a/n + lambda * sum(pi^gamma * b) < -sum(gam2 * log(pi2))/n + 
-      lambda * sum(pi2^gamma * b))
-      {
+    while (kk < 1000 && -a/n + lambda * sum(pi^gamma * b) <
+      -sum(gam2 * log(pi2))/n + lambda * sum(pi2^gamma * b))
+    {
        pi2 <- pi + 0.1^kk * (1/n * gam2 - pi)
        kk <- kk + 1
      }
      t <- 0.1^kk
      pi <- (pi + t * (pi2 - pi))/sum(pi + t * (pi2 - pi))
-    
+
      # For phi and rho
      for (r in 1:k)
      {
        for (mm in 1:m)
        {
          ps <- 0
-        for (i in 1:n) ps <- ps + Y2[i, mm, r] * sum(X2[i, , r] * phi[, mm, 
-          r])
+        for (i in 1:n)
+          ps <- ps + Y2[i, mm, r] * sum(X2[i, , r] * phi[, mm, r])
          nY2 <- sum(Y2[, mm, r]^2)
          rho[mm, mm, r] <- (ps + sqrt(ps^2 + 4 * nY2 * gam2[r]))/(2 * nY2)
        }
      }
-    
+
      for (r in 1:k)
      {
        for (j in 1:p)
        {
          for (mm in 1:m)
          {
-          S[j, mm, r] <- -rho[mm, mm, r] * ps2[j, mm, r] + sum(phi[-j, mm, 
-          r] * Gram2[j, -j, r])
-          if (abs(S[j, mm, r]) <= n * lambda * (pi[r]^gamma))
-          {
-          phi[j, mm, r] <- 0
-          } else if (S[j, mm, r] > n * lambda * (pi[r]^gamma))
-          {
-          phi[j, mm, r] <- (n * lambda * (pi[r]^gamma) - S[j, mm, r])/Gram2[j, 
-            j, r]
-          } else
-          {
-          phi[j, mm, r] <- -(n * lambda * (pi[r]^gamma) + S[j, mm, r])/Gram2[j, 
-            j, r]
+          S[j, mm, r] <- -rho[mm, mm, r] * ps2[j, mm, r]
+            + sum(phi[-j, mm, r] * Gram2[j, -j, r])
+          if (abs(S[j, mm, r]) <= n * lambda * (pi[r]^gamma)) {
+            phi[j, mm, r] <- 0
+          } else if (S[j, mm, r] > n * lambda * (pi[r]^gamma)) {
+            phi[j, mm, r] <- (n * lambda * (pi[r]^gamma) - S[j, mm, r])/Gram2[j, j, r]
+          } else {
+            phi[j, mm, r] <- -(n * lambda * (pi[r]^gamma) + S[j, mm, r])/Gram2[j, j, r]
            }
          }
        }
      }
-    
+
      ######## E step#
-    
+
      # Precompute det(rho[,,r]) for r in 1...k
      detRho <- sapply(1:k, function(r) det(rho[, , r]))
      gam1 <- matrix(0, nrow = n, ncol = k)
      for (i in 1:n)
      {
        # Update gam[,]
-      for (r in 1:k) gam1[i, r] <- pi[r] * exp(-0.5 * sum((Y[i, ] %*% rho[, 
-        , r] - X[i, ] %*% phi[, , r])^2)) * detRho[r]
+      for (r in 1:k)
+      {
+        gam1[i, r] <- pi[r] * exp(-0.5
+          * sum((Y[i, ] %*% rho[, , r] - X[i, ] %*% phi[, , r])^2)) * detRho[r]
+      }
      }
-    gam <- gam1/rowSums(gam1)
+    gam <- gam1 / rowSums(gam1)
      sumLogLLH <- sum(log(rowSums(gam)) - log((2 * base::pi)^(m/2)))
      sumPen <- sum(pi^gamma * b)
      last_llh <- llh
@@ -182,10 +183,10 @@ EMGLLF <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda,
      Dist2 <- max((abs(rho - Rho))/(1 + abs(rho)))
      Dist3 <- max((abs(pi - Pi))/(1 + abs(Pi)))
      dist2 <- max(Dist1, Dist2, Dist3)
-    
+
      if (ite >= mini && (dist >= eps || dist2 >= sqrt(eps))) 
        break
    }
-  
+
    list(phi = phi, rho = rho, pi = pi, llh = llh, S = S)
  }
diff --git a/pkg/R/EMGrank.R b/pkg/R/EMGrank.R

index 436b289..db40948 100644 (file)
--- a/pkg/R/EMGrank.R
+++ b/pkg/R/EMGrank.R
@@ -1,4 +1,4 @@
-#' EMGrank 
+#' EMGrank
  #'
  #' Description de EMGrank
  #'
@@ -23,7 +23,7 @@ EMGrank <- function(Pi, Rho, mini, maxi, X, Y, tau, rank, fast = TRUE)
      # Function in R
      return(.EMGrank_R(Pi, Rho, mini, maxi, X, Y, tau, rank))
    }
-  
+
    # Function in C
    n <- nrow(X)  #nombre d'echantillons
    p <- ncol(X)  #nombre de covariables
@@ -50,12 +50,12 @@ matricize <- function(X)
    p <- dim(X)[2]
    m <- dim(Rho)[2]
    k <- dim(Rho)[3]
-  
+
    # init outputs
    phi <- array(0, dim = c(p, m, k))
    Z <- rep(1, n)
    LLF <- 0
-  
+
    # local variables
    Phi <- array(0, dim = c(p, m, k))
    deltaPhi <- c()
@@ -69,12 +69,12 @@ matricize <- function(X)
      # M step: update for Beta ( and then phi)
      for (r in 1:k)
      {
-      Z_indice <- seq_len(n)[Z == r]  #indices where Z == r
+      Z_indice <- seq_len(n)[Z == r] #indices where Z == r
        if (length(Z_indice) == 0) 
          next
        # U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr
-      s <- svd(MASS::ginv(crossprod(matricize(X[Z_indice, ]))) %*% crossprod(matricize(X[Z_indice, 
-        ]), matricize(Y[Z_indice, ])))
+      s <- svd(MASS::ginv(crossprod(matricize(X[Z_indice, ])))
+        %*% crossprod(matricize(X[Z_indice, ]), matricize(Y[Z_indice, ])))
        S <- s$d
        # Set m-rank(r) singular values to zero, and recompose best rank(r) approximation
        # of the initial product
@@ -82,7 +82,7 @@ matricize <- function(X)
          S[(rank[r] + 1):length(S)] <- 0
        phi[, , r] <- s$u %*% diag(S) %*% t(s$v) %*% Rho[, , r]
      }
-    
+
      # Step E and computation of the loglikelihood
      sumLogLLF2 <- 0
      for (i in seq_len(n))
@@ -91,8 +91,7 @@ matricize <- function(X)
        maxLogGamIR <- -Inf
        for (r in seq_len(k))
        {
-        dotProduct <- tcrossprod(Y[i, ] %*% Rho[, , r] - X[i, ] %*% phi[, 
-          , r])
+        dotProduct <- tcrossprod(Y[i, ] %*% Rho[, , r] - X[i, ] %*% phi[, , r])
          logGamIR <- log(Pi[r]) + log(det(Rho[, , r])) - 0.5 * dotProduct
          # Z[i] = index of max (gam[i,])
          if (logGamIR > maxLogGamIR)
@@ -104,15 +103,15 @@ matricize <- function(X)
        }
        sumLogLLF2 <- sumLogLLF2 + log(sumLLF1)
      }
-    
+
      LLF <- -1/n * sumLogLLF2
-    
+
      # update distance parameter to check algorithm convergence (delta(phi, Phi))
-    deltaPhi <- c(deltaPhi, max((abs(phi - Phi))/(1 + abs(phi))))  #TODO: explain?
+    deltaPhi <- c(deltaPhi, max((abs(phi - Phi))/(1 + abs(phi)))) #TODO: explain?
      if (length(deltaPhi) > deltaPhiBufferSize) 
        deltaPhi <- deltaPhi[2:length(deltaPhi)]
      sumDeltaPhi <- sum(abs(deltaPhi))
-    
+
      # update other local variables
      Phi <- phi
      ite <- ite + 1
diff --git a/pkg/R/computeGridLambda.R b/pkg/R/computeGridLambda.R

index 4b68bcd..c34c707 100644 (file)
--- a/pkg/R/computeGridLambda.R
+++ b/pkg/R/computeGridLambda.R
@@ -18,18 +18,19 @@
  #' @export
  computeGridLambda <- function(phiInit, rhoInit, piInit, gamInit, X, Y, gamma, mini, 
    maxi, tau, fast = TRUE)
-  {
+{
    n <- nrow(X)
    p <- dim(phiInit)[1]
    m <- dim(phiInit)[2]
    k <- dim(phiInit)[3]
-  
+
    list_EMG <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda = 0, 
      X, Y, tau, fast)
    grid <- array(0, dim = c(p, m, k))
    for (i in 1:p)
    {
-    for (j in 1:m) grid[i, j, ] <- abs(list_EMG$S[i, j, ])/(n * list_EMG$pi^gamma)
+    for (j in 1:m)
+      grid[i, j, ] <- abs(list_EMG$S[i, j, ])/(n * list_EMG$pi^gamma)
    }
    sort(unique(grid))
  }
diff --git a/pkg/R/constructionModelesLassoMLE.R b/pkg/R/constructionModelesLassoMLE.R

index 760da40..4f23bb0 100644 (file)
--- a/pkg/R/constructionModelesLassoMLE.R
+++ b/pkg/R/constructionModelesLassoMLE.R
@@ -22,7 +22,7 @@
  #' @export
  constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini, 
    maxi, gamma, X, Y, eps, S, ncores = 3, fast = TRUE, verbose = FALSE)
-  {
+{
    if (ncores > 1)
    {
      cl <- parallel::makeCluster(ncores, outfile = "")
@@ -30,16 +30,16 @@ constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini,
        "rhoInit", "gamInit", "mini", "maxi", "gamma", "X", "Y", "eps", "S", 
        "ncores", "fast", "verbose"))
    }
-  
+
    # Individual model computation
    computeAtLambda <- function(lambda)
    {
      if (ncores > 1) 
        require("valse")  #nodes start with an empty environment
-    
+
      if (verbose) 
        print(paste("Computations for lambda=", lambda))
-    
+
      n <- dim(X)[1]
      p <- dim(phiInit)[1]
      m <- dim(phiInit)[2]
@@ -49,43 +49,45 @@ constructionModelesLassoMLE <- function(phiInit, rhoInit, piInit, gamInit, mini,
      col.sel <- which(sapply(sel.lambda, length) > 0)  #if list of selected vars
      if (length(col.sel) == 0) 
        return(NULL)
-    
+
      # lambda == 0 because we compute the EMV: no penalization here
      res <- EMGLLF(phiInit[col.sel, , ], rhoInit, piInit, gamInit, mini, maxi, 
        gamma, 0, X[, col.sel], Y, eps, fast)
-    
+
      # Eval dimension from the result + selected
      phiLambda2 <- res$phi
      rhoLambda <- res$rho
      piLambda <- res$pi
      phiLambda <- array(0, dim = c(p, m, k))
-    for (j in seq_along(col.sel)) phiLambda[col.sel[j], sel.lambda[[j]], ] <- phiLambda2[j, 
-      sel.lambda[[j]], ]
+    for (j in seq_along(col.sel))
+      phiLambda[col.sel[j], sel.lambda[[j]], ] <- phiLambda2[j, sel.lambda[[j]], ]
      dimension <- length(unlist(sel.lambda))
-    
+
      # Computation of the loglikelihood
      densite <- vector("double", n)
      for (r in 1:k)
      {
        if (length(col.sel) == 1)
        {
-        delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% t(phiLambda[col.sel, 
-          , r])))
-      } else delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% phiLambda[col.sel, 
-        , r]))
-      densite <- densite + piLambda[r] * det(rhoLambda[, , r])/(sqrt(2 * base::pi))^m * 
-        exp(-diag(tcrossprod(delta))/2)
+        delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% t(phiLambda[col.sel, , r])))
+      } else delta <- (Y %*% rhoLambda[, , r] - (X[, col.sel] %*% phiLambda[col.sel, , r]))
+      densite <- densite + piLambda[r] * det(rhoLambda[, , r])/(sqrt(2 * base::pi))^m
+        * exp(-diag(tcrossprod(delta))/2)
      }
      llhLambda <- c(sum(log(densite)), (dimension + m + 1) * k - 1)
      list(phi = phiLambda, rho = rhoLambda, pi = piLambda, llh = llhLambda)
    }
-  
+
    # For each lambda, computation of the parameters
-  out <- if (ncores > 1) 
-    parLapply(cl, 1:length(S), computeAtLambda) else lapply(1:length(S), computeAtLambda)
-  
+  out <-
+    if (ncores > 1) {
+      parLapply(cl, 1:length(S), computeAtLambda)
+    } else {
+      lapply(1:length(S), computeAtLambda)
+    }
+
    if (ncores > 1) 
      parallel::stopCluster(cl)
-  
+
    out
  }
diff --git a/pkg/R/constructionModelesLassoRank.R b/pkg/R/constructionModelesLassoRank.R

index b997303..5857a42 100644 (file)
--- a/pkg/R/constructionModelesLassoRank.R
+++ b/pkg/R/constructionModelesLassoRank.R
@@ -20,12 +20,12 @@
  #' @export
  constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min, rank.max, 
    ncores, fast = TRUE, verbose = FALSE)
-  {
+{
    n <- dim(X)[1]
    p <- dim(X)[2]
    m <- dim(Y)[2]
    L <- length(S)
-  
+
    # Possible interesting ranks
    deltaRank <- rank.max - rank.min + 1
    Size <- deltaRank^k
@@ -42,7 +42,7 @@ constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min,
        each = deltaRank^(k - r)), each = L)
    }
    RankLambda[, k + 1] <- rep(1:L, times = Size)
-  
+
    if (ncores > 1)
    {
      cl <- parallel::makeCluster(ncores, outfile = "")
@@ -50,23 +50,21 @@ constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min,
        "Pi", "Rho", "mini", "maxi", "X", "Y", "eps", "Rank", "m", "phi", "ncores", 
        "verbose"))
    }
-  
+
    computeAtLambda <- function(index)
    {
      lambdaIndex <- RankLambda[index, k + 1]
      rankIndex <- RankLambda[index, 1:k]
      if (ncores > 1) 
        require("valse")  #workers start with an empty environment
-    
+
      # 'relevant' will be the set of relevant columns
      selected <- S[[lambdaIndex]]$selected
      relevant <- c()
      for (j in 1:p)
      {
        if (length(selected[[j]]) > 0)
-      {
          relevant <- c(relevant, j)
-      }
      }
      if (max(rankIndex) < length(relevant))
      {
@@ -75,25 +73,23 @@ constructionModelesLassoRank <- function(S, k, mini, maxi, X, Y, eps, rank.min,
        {
          res <- EMGrank(S[[lambdaIndex]]$Pi, S[[lambdaIndex]]$Rho, mini, maxi, 
            X[, relevant], Y, eps, rankIndex, fast)
-        llh <- c(res$LLF, sum(rankIndex * (length(relevant) - rankIndex + 
-          m)))
+        llh <- c(res$LLF, sum(rankIndex * (length(relevant) - rankIndex + m)))
          phi[relevant, , ] <- res$phi
        }
        list(llh = llh, phi = phi, pi = S[[lambdaIndex]]$Pi, rho = S[[lambdaIndex]]$Rho)
      }
    }
-  
+
    # For each lambda in the grid we compute the estimators
-  out <- if (ncores > 1)
-  {
-    parLapply(cl, seq_len(length(S) * Size), computeAtLambda)
-  } else
-  {
-    lapply(seq_len(length(S) * Size), computeAtLambda)
-  }
-  
+  out <-
+    if (ncores > 1) {
+      parLapply(cl, seq_len(length(S) * Size), computeAtLambda)
+    } else {
+      lapply(seq_len(length(S) * Size), computeAtLambda)
+    }
+
    if (ncores > 1) 
      parallel::stopCluster(cl)
-  
+
    out
  }
diff --git a/pkg/R/generateXY.R b/pkg/R/generateXY.R

index fe86045..064b54b 100644 (file)
--- a/pkg/R/generateXY.R
+++ b/pkg/R/generateXY.R
@@ -17,14 +17,14 @@ generateXY <- function(n, π, meanX, β, covX, covY)
    p <- dim(covX)[1]
    m <- dim(covY)[1]
    k <- dim(covY)[3]
-  
+
    X <- matrix(nrow = 0, ncol = p)
    Y <- matrix(nrow = 0, ncol = m)
-  
+
    # random generation of the size of each population in X~Y (unordered)
    sizePop <- rmultinom(1, n, π)
-  class <- c()  #map i in 1:n --> index of class in 1:k
-  
+  class <- c() #map i in 1:n --> index of class in 1:k
+
    for (i in 1:k)
    {
      class <- c(class, rep(i, sizePop[i]))
@@ -33,7 +33,7 @@ generateXY <- function(n, π, meanX, β, covX, covY)
      Y <- rbind(Y, t(apply(newBlockX, 1, function(row) MASS::mvrnorm(1, row %*% 
        β[, , i], covY[, , i]))))
    }
-  
+
    shuffle <- sample(n)
    list(X = X[shuffle, ], Y = Y[shuffle, ], class = class[shuffle])
  }
diff --git a/pkg/R/initSmallEM.R b/pkg/R/initSmallEM.R

index fafa2c4..d1ade1c 100644 (file)
--- a/pkg/R/initSmallEM.R
+++ b/pkg/R/initSmallEM.R
@@ -23,24 +23,22 @@ initSmallEM <- function(k, X, Y, fast = TRUE)
    piInit1 <- matrix(0, nIte, k)
    gamInit1 <- array(0, dim = c(n, k, nIte))
    LLFinit1 <- list()
-  
+
    # require(MASS) #Moore-Penrose generalized inverse of matrix
    for (repet in 1:nIte)
    {
      distance_clus <- dist(cbind(X, Y))
      tree_hier <- hclust(distance_clus)
      Zinit1[, repet] <- cutree(tree_hier, k)
-    
+
      for (r in 1:k)
      {
        Z <- Zinit1[, repet]
        Z_indice <- seq_len(n)[Z == r]  #renvoit les indices où Z==r
-      if (length(Z_indice) == 1)
-      {
+      if (length(Z_indice) == 1) {
          betaInit1[, , r, repet] <- MASS::ginv(crossprod(t(X[Z_indice, ]))) %*% 
            crossprod(t(X[Z_indice, ]), Y[Z_indice, ])
-      } else
-      {
+      } else {
          betaInit1[, , r, repet] <- MASS::ginv(crossprod(X[Z_indice, ])) %*% 
            crossprod(X[Z_indice, ], Y[Z_indice, ])
        }
@@ -49,26 +47,26 @@ initSmallEM <- function(k, X, Y, fast = TRUE)
        rhoInit1[, , r, repet] <- solve(sigmaInit1[, , r, repet])
        piInit1[repet, r] <- mean(Z == r)
      }
-    
+
      for (i in 1:n)
      {
        for (r in 1:k)
        {
-        dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet] - X[i, 
-          ] %*% phiInit1[, , r, repet])
-        Gam[i, r] <- piInit1[repet, r] * det(rhoInit1[, , r, repet]) * exp(-0.5 * 
-          dotProduct)
+        dotProduct <- tcrossprod(Y[i, ] %*% rhoInit1[, , r, repet]
+          - X[i, ] %*% phiInit1[, , r, repet])
+        Gam[i, r] <- piInit1[repet, r]
+          * det(rhoInit1[, , r, repet]) * exp(-0.5 * dotProduct)
        }
        sumGamI <- sum(Gam[i, ])
        gamInit1[i, , repet] <- Gam[i, ]/sumGamI
      }
-    
+
      miniInit <- 10
      maxiInit <- 11
-    
-    init_EMG <- EMGLLF(phiInit1[, , , repet], rhoInit1[, , , repet], piInit1[repet, 
-      ], gamInit1[, , repet], miniInit, maxiInit, gamma = 1, lambda = 0, X, 
-      Y, eps = 1e-04, fast)
+
+    init_EMG <- EMGLLF(phiInit1[, , , repet], rhoInit1[, , , repet], piInit1[repet, ],
+      gamInit1[, , repet], miniInit, maxiInit, gamma = 1, lambda = 0, X, Y,
+      eps = 1e-04, fast)
      LLFEessai <- init_EMG$LLF
      LLFinit1[repet] <- LLFEessai[length(LLFEessai)]
    }
@@ -77,6 +75,6 @@ initSmallEM <- function(k, X, Y, fast = TRUE)
    rhoInit <- rhoInit1[, , , b]
    piInit <- piInit1[b, ]
    gamInit <- gamInit1[, , b]
-  
+
    return(list(phiInit = phiInit, rhoInit = rhoInit, piInit = piInit, gamInit = gamInit))
  }
diff --git a/pkg/R/main.R b/pkg/R/main.R

index 3b9620d..64e0586 100644 (file)
--- a/pkg/R/main.R
+++ b/pkg/R/main.R
@@ -30,29 +30,28 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi
    maxi = 50, eps = 1e-04, kmin = 2, kmax = 3, rank.min = 1, rank.max = 5, ncores_outer = 1, 
    ncores_inner = 1, thresh = 1e-08, size_coll_mod = 10, fast = TRUE, verbose = FALSE, 
    plot = TRUE)
-  {
+{
    p <- dim(X)[2]
    m <- dim(Y)[2]
    n <- dim(X)[1]
-  
+
    if (verbose) 
      print("main loop: over all k and all lambda")
-  
-  if (ncores_outer > 1)
-  {
+
+  if (ncores_outer > 1) {
      cl <- parallel::makeCluster(ncores_outer, outfile = "")
      parallel::clusterExport(cl = cl, envir = environment(), varlist = c("X", 
        "Y", "procedure", "selecMod", "gamma", "mini", "maxi", "eps", "kmin", 
        "kmax", "rank.min", "rank.max", "ncores_outer", "ncores_inner", "thresh", 
        "size_coll_mod", "verbose", "p", "m"))
    }
-  
+
    # Compute models with k components
    computeModels <- function(k)
    {
      if (ncores_outer > 1) 
        require("valse")  #nodes start with an empty environment
-    
+
      if (verbose) 
        print(paste("Parameters initialization for k =", k))
      # smallEM initializes parameters by k-means and regression model in each
@@ -63,25 +62,22 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi
        X, Y, gamma, mini, maxi, eps, fast)
      if (length(grid_lambda) > size_coll_mod) 
        grid_lambda <- grid_lambda[seq(1, length(grid_lambda), length.out = size_coll_mod)]
-    
+
      if (verbose) 
        print("Compute relevant parameters")
      # select variables according to each regularization parameter from the grid:
      # S$selected corresponding to selected variables
      S <- selectVariables(P$phiInit, P$rhoInit, P$piInit, P$gamInit, mini, maxi, 
        gamma, grid_lambda, X, Y, thresh, eps, ncores_inner, fast)
-    
-    if (procedure == "LassoMLE")
-    {
+
+    if (procedure == "LassoMLE") {
        if (verbose) 
          print("run the procedure Lasso-MLE")
        # compute parameter estimations, with the Maximum Likelihood Estimator,
        # restricted on selected variables.
        models <- constructionModelesLassoMLE(P$phiInit, P$rhoInit, P$piInit, 
          P$gamInit, mini, maxi, gamma, X, Y, eps, S, ncores_inner, fast, verbose)
-      
-    } else
-    {
+    } else {
        if (verbose) 
          print("run the procedure Lasso-Rank")
        # compute parameter estimations, with the Low Rank Estimator, restricted on
@@ -93,19 +89,23 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi
      models <- models[sapply(models, function(cell) !is.null(cell))]
      models
    }
-  
+
    # List (index k) of lists (index lambda) of models
-  models_list <- if (ncores_outer > 1) 
-    parLapply(cl, kmin:kmax, computeModels) else lapply(kmin:kmax, computeModels)
+  models_list <-
+    if (ncores_outer > 1) {
+      parLapply(cl, kmin:kmax, computeModels)
+    } else {
+      lapply(kmin:kmax, computeModels)
+    }
    if (ncores_outer > 1) 
      parallel::stopCluster(cl)
-  
+
    if (!requireNamespace("capushe", quietly = TRUE))
    {
      warning("'capushe' not available: returning all models")
      return(models_list)
    }
-  
+
    # Get summary 'tableauRecap' from models
    tableauRecap <- do.call(rbind, lapply(seq_along(models_list), function(i)
    {
@@ -118,41 +118,35 @@ valse <- function(X, Y, procedure = "LassoMLE", selecMod = "DDSE", gamma = 1, mi
      data.frame(model = paste(i, ".", seq_along(models), sep = ""), pen = sumPen/n, 
        complexity = sumPen, contrast = -LLH)
    }))
-  
-  print(tableauRecap)
    tableauRecap <- tableauRecap[which(tableauRecap[, 4] != Inf), ]
-  
+
    modSel <- capushe::capushe(tableauRecap, n)
    indModSel <- if (selecMod == "DDSE") 
      as.numeric(modSel@DDSE@model) else if (selecMod == "Djump") 
      as.numeric(modSel@Djump@model) else if (selecMod == "BIC") 
      modSel@BIC_capushe$model else if (selecMod == "AIC") 
      modSel@AIC_capushe$model
-  
+
    mod <- as.character(tableauRecap[indModSel, 1])
    listMod <- as.integer(unlist(strsplit(mod, "[.]")))
    modelSel <- models_list[[listMod[1]]][[listMod[2]]]
-  
+
    ## Affectations
    Gam <- matrix(0, ncol = length(modelSel$pi), nrow = n)
    for (i in 1:n)
    {
      for (r in 1:length(modelSel$pi))
      {
-      sqNorm2 <- sum((Y[i, ] %*% modelSel$rho[, , r] - X[i, ] %*% modelSel$phi[, 
-        , r])^2)
-      Gam[i, r] <- modelSel$pi[r] * exp(-0.5 * sqNorm2) * det(modelSel$rho[, 
-        , r])
+      sqNorm2 <- sum((Y[i, ] %*% modelSel$rho[, , r] - X[i, ] %*% modelSel$phi[, , r])^2)
+      Gam[i, r] <- modelSel$pi[r] * exp(-0.5 * sqNorm2) * det(modelSel$rho[, , r])
      }
    }
    Gam <- Gam/rowSums(Gam)
    modelSel$affec <- apply(Gam, 1, which.max)
    modelSel$proba <- Gam
-  
+
    if (plot)
-  {
      print(plot_valse(X, Y, modelSel, n))
-  }
-  
+
    return(modelSel)
  }
diff --git a/pkg/R/plot_valse.R b/pkg/R/plot_valse.R

index 6207061..ec2302d 100644 (file)
--- a/pkg/R/plot_valse.R
+++ b/pkg/R/plot_valse.R
@@ -18,7 +18,7 @@ plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA)
    require("ggplot2")
    require("reshape2")
    require("cowplot")
-  
+
    K <- length(model$pi)
    ## regression matrices
    gReg <- list()
@@ -27,51 +27,47 @@ plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA)
      Melt <- melt(t((model$phi[, , r])))
      gReg[[r]] <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + 
        geom_tile() + scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
-      midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster", 
-      r))
+      midpoint = 0, space = "Lab") + ggtitle(paste("Regression matrices in cluster", r))
    }
    print(gReg)
-  
+
    ## Differences between two clusters
    if (comp)
    {
      if (is.na(k1) || is.na(k))
-    {
        print("k1 and k2 must be integers, representing the clusters you want to compare")
-    }
      Melt <- melt(t(model$phi[, , k1] - model$phi[, , k2]))
-    gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value)) + geom_tile() + 
-      scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, 
-        space = "Lab") + ggtitle(paste("Difference between regression matrices in cluster", 
-      k1, "and", k2))
+    gDiff <- ggplot(data = Melt, aes(x = Var1, y = Var2, fill = value))
+      + geom_tile()
+      + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, 
+        space = "Lab")
+      + ggtitle(paste("Difference between regression matrices in cluster", 
+        k1, "and", k2))
      print(gDiff)
-    
    }
-  
+
    ### Covariance matrices
    matCov <- matrix(NA, nrow = dim(model$rho[, , 1])[1], ncol = K)
    for (r in 1:K)
-  {
      matCov[, r] <- diag(model$rho[, , r])
-  }
    MeltCov <- melt(matCov)
-  gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile() + 
-    scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, 
-      space = "Lab") + ggtitle("Covariance matrices")
+  gCov <- ggplot(data = MeltCov, aes(x = Var1, y = Var2, fill = value)) + geom_tile()
+    + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, 
+      space = "Lab")
+    + ggtitle("Covariance matrices")
    print(gCov)
-  
+
    ### Proportions
    gam2 <- matrix(NA, ncol = K, nrow = n)
    for (i in 1:n)
-  {
      gam2[i, ] <- c(model$proba[i, model$affec[i]], model$affec[i])
-  }
-  
-  bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2)) + 
-    geom_boxplot() + theme(legend.position = "none") + background_grid(major = "xy", 
-    minor = "none")
+
+  bp <- ggplot(data.frame(gam2), aes(x = X2, y = X1, color = X2, group = X2))
+    + geom_boxplot()
+    + theme(legend.position = "none")
+    + background_grid(major = "xy", minor = "none")
    print(bp)
-  
+
    ### Mean in each cluster
    XY <- cbind(X, Y)
    XY_class <- list()
@@ -79,18 +75,15 @@ plot_valse <- function(X, Y, model, n, comp = FALSE, k1 = NA, k2 = NA)
    for (r in 1:K)
    {
      XY_class[[r]] <- XY[model$affec == r, ]
-    if (sum(model$affec == r) == 1)
-    {
+    if (sum(model$affec == r) == 1) {
        meanPerClass[, r] <- XY_class[[r]]
-    } else
-    {
+    } else {
        meanPerClass[, r] <- apply(XY_class[[r]], 2, mean)
      }
    }
-  data <- data.frame(mean = as.vector(meanPerClass), cluster = as.character(rep(1:K, 
-    each = dim(XY)[2])), time = rep(1:dim(XY)[2], K))
+  data <- data.frame(mean = as.vector(meanPerClass),
+    cluster = as.character(rep(1:K, each = dim(XY)[2])), time = rep(1:dim(XY)[2], K))
    g <- ggplot(data, aes(x = time, y = mean, group = cluster, color = cluster))
-  print(g + geom_line(aes(linetype = cluster, color = cluster)) + geom_point(aes(color = cluster)) + 
-    ggtitle("Mean per cluster"))
-  
+  print(g + geom_line(aes(linetype = cluster, color = cluster))
+    + geom_point(aes(color = cluster)) + ggtitle("Mean per cluster"))
  }
diff --git a/pkg/R/selectVariables.R b/pkg/R/selectVariables.R

index fe0688c..f717cae 100644 (file)
--- a/pkg/R/selectVariables.R
+++ b/pkg/R/selectVariables.R
@@ -24,35 +24,33 @@
  #'
  selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, 
    glambda, X, Y, thresh = 1e-08, eps, ncores = 3, fast = TRUE)
-  {
-  if (ncores > 1)
-  {
+{
+  if (ncores > 1) {
      cl <- parallel::makeCluster(ncores, outfile = "")
      parallel::clusterExport(cl = cl, varlist = c("phiInit", "rhoInit", "gamInit", 
        "mini", "maxi", "glambda", "X", "Y", "thresh", "eps"), envir = environment())
    }
-  
+
    # Computation for a fixed lambda
    computeCoefs <- function(lambda)
    {
      params <- EMGLLF(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma, lambda, 
        X, Y, eps, fast)
-    
+
      p <- dim(phiInit)[1]
      m <- dim(phiInit)[2]
-    
+
      # selectedVariables: list where element j contains vector of selected variables
      # in [1,m]
-    selectedVariables <- lapply(1:p, function(j)
-    {
+    selectedVariables <- lapply(1:p, function(j) {
        # from boolean matrix mxk of selected variables obtain the corresponding boolean
        # m-vector, and finally return the corresponding indices
        seq_len(m)[apply(abs(params$phi[j, , ]) > thresh, 1, any)]
      })
-    
+
      list(selected = selectedVariables, Rho = params$rho, Pi = params$pi)
    }
-  
+
    # For each lambda in the grid, we compute the coefficients
    out <- if (ncores > 1) 
      parLapply(cl, glambda, computeCoefs) else lapply(glambda, computeCoefs)
@@ -67,8 +65,6 @@ selectVariables <- function(phiInit, rhoInit, piInit, gamInit, mini, maxi, gamma
    ind_uniq <- which(!ind_dup)
    out2 <- list()
    for (l in 1:length(ind_uniq))
-  {
      out2[[l]] <- out[[ind_uniq[l]]]
-  }
    out2
  }
author	Benjamin Auder <benjamin.auder@somewhere>
	Fri, 14 Apr 2017 16:43:47 +0000 (18:43 +0200)
committer	Benjamin Auder <benjamin.auder@somewhere>
	Fri, 14 Apr 2017 16:43:47 +0000 (18:43 +0200)
hooks/pre-commit.bad	[moved from hooks/pre-commit with 100% similarity]	patch \| blob \| blame \| history
pkg/R/EMGLLF.R		patch \| blob \| blame \| history
pkg/R/EMGrank.R		patch \| blob \| blame \| history
pkg/R/computeGridLambda.R		patch \| blob \| blame \| history
pkg/R/constructionModelesLassoMLE.R		patch \| blob \| blame \| history
pkg/R/constructionModelesLassoRank.R		patch \| blob \| blame \| history
pkg/R/generateXY.R		patch \| blob \| blame \| history
pkg/R/initSmallEM.R		patch \| blob \| blame \| history
pkg/R/main.R		patch \| blob \| blame \| history
pkg/R/plot_valse.R		patch \| blob \| blame \| history
pkg/R/selectVariables.R		patch \| blob \| blame \| history