reorganize test folder: tests generation with MATLAB almost OK (but suboptimal method)
authorBenjamin Auder <benjamin.auder@somewhere>
Sat, 14 Jan 2017 02:53:34 +0000 (03:53 +0100)
committerBenjamin Auder <benjamin.auder@somewhere>
Sat, 14 Jan 2017 02:53:34 +0000 (03:53 +0100)
27 files changed:
man/TODO [deleted file]
src/.gitignore
src/test/.gitignore [new file with mode: 0644]
src/test/OLD_TEST_MATLAB/TODO [deleted file]
src/test/OLD_TEST_MATLAB/checkOutput.m [deleted file]
src/test/OLD_TEST_MATLAB/testConstructionModelesLassoMLE.m [deleted file]
src/test/OLD_TEST_MATLAB/testEMGLLF.m [deleted file]
src/test/OLD_TEST_MATLAB/testSelectiontotale.m [deleted file]
src/test/generate_test_data/MATLAB_test_helpers/EMGLLF.m [new file with mode: 0644]
src/test/generate_test_data/MATLAB_test_helpers/EMGrank.m [new file with mode: 0644]
src/test/generate_test_data/MATLAB_test_helpers/Octave.m [new file with mode: 0644]
src/test/generate_test_data/MATLAB_test_helpers/README [new file with mode: 0644]
src/test/generate_test_data/MATLAB_test_helpers/basicInitParameters.m [moved from src/test/OLD_TEST_MATLAB/basicInitParameters.m with 100% similarity]
src/test/generate_test_data/MATLAB_test_helpers/constructionModelesLassoMLE.m [new file with mode: 0644]
src/test/generate_test_data/MATLAB_test_helpers/constructionModelesLassoRank.m [new file with mode: 0644]
src/test/generate_test_data/MATLAB_test_helpers/covariance.m [moved from src/test/OLD_TEST_MATLAB/covariance.m with 100% similarity]
src/test/generate_test_data/MATLAB_test_helpers/generateIO.m [moved from src/test/OLD_TEST_MATLAB/generateIO.m with 100% similarity]
src/test/generate_test_data/MATLAB_test_helpers/generateIOdefault.m [moved from src/test/OLD_TEST_MATLAB/generateIOdefault.m with 100% similarity]
src/test/generate_test_data/MATLAB_test_helpers/selectiontotale.m [new file with mode: 0644]
src/test/generate_test_data/R_test_helpers/checkOutput.R [moved from src/test/TEST R/checkOutput.R with 100% similarity]
src/test/generate_test_data/R_test_helpers/covariance.R [moved from src/test/TEST R/covariance.R with 100% similarity]
src/test/generate_test_data/R_test_helpers/testEMGLLF.R [moved from src/test/TEST R/testEMGLLF.R with 100% similarity]
src/test/generate_test_data/generateRunSaveTest_EMGLLF.m [moved from src/test/OLD_TEST_MATLAB/generateRunSaveTest_EMGLLF.m with 98% similarity]
src/test/generate_test_data/generateRunSaveTest_EMGrank.m [new file with mode: 0644]
src/test/generate_test_data/generateRunSaveTest_constructionModelesLassoMLE.m [moved from src/test/OLD_TEST_MATLAB/generateRunSaveTest_constructionModelesLassoMLE.m with 98% similarity]
src/test/generate_test_data/generateRunSaveTest_constructionModelesLassoRank.m [new file with mode: 0644]
src/test/generate_test_data/generateRunSaveTest_selectiontotale.m [moved from src/test/OLD_TEST_MATLAB/generateRunSaveTest_selectiontotale.m with 98% similarity]

diff --git a/man/TODO b/man/TODO
deleted file mode 100644 (file)
index e69de29..0000000
index a8b0573..bdfbcfb 100644 (file)
@@ -1,6 +1,4 @@
 #ignore object files, library and test executables
 *.o
 *.so
-test.*
-!test.*.c
 vgcore.*
diff --git a/src/test/.gitignore b/src/test/.gitignore
new file mode 100644 (file)
index 0000000..05354ad
--- /dev/null
@@ -0,0 +1,3 @@
+test.*
+!test.*.c
+/data/
diff --git a/src/test/OLD_TEST_MATLAB/TODO b/src/test/OLD_TEST_MATLAB/TODO
deleted file mode 100644 (file)
index b3dcd6a..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-1) transformer les .m en .R : toute la partie "R-only" du package "valse"
-   est utilisable si besoin
-2) sauvegarder les résultats + entrées/sorties à partir du code R uniquement,
-   dans le dossier src/test/data (** sous forme de vecteurs, sep=" " **)
diff --git a/src/test/OLD_TEST_MATLAB/checkOutput.m b/src/test/OLD_TEST_MATLAB/checkOutput.m
deleted file mode 100644 (file)
index c56ed0b..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-function[]=checkOutput(varName, matrix, refMatrix, tol)
-
-       fprintf('Checking %s\n',varName);
-       maxError = max(max(max(max(abs(matrix - refMatrix)))));
-       if maxError >= tol
-               fprintf('    Inaccuracy: max(abs(error)) = %g >= %g\n',maxError,tol);
-       else
-               fprintf('    OK\n');
-       end
-
-end
diff --git a/src/test/OLD_TEST_MATLAB/testConstructionModelesLassoMLE.m b/src/test/OLD_TEST_MATLAB/testConstructionModelesLassoMLE.m
deleted file mode 100644 (file)
index 27c1208..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-function[] = testConstructionModelesLassoMLE()
-
-       testFolder = 'data/';
-       delimiter = '\n';
-
-       %get dimensions
-       dimensions = dlmread(strcat(testFolder,'dimensions'), delimiter);
-       n = dimensions(1);
-       p = dimensions(2);
-       m = dimensions(3);
-       k = dimensions(4);
-       L = dimensions(5);
-
-       %get all input arrays
-       phiInit = reshape(dlmread(strcat(testFolder,'phiInit'), delimiter), p, m, k);
-       rhoInit = reshape(dlmread(strcat(testFolder,'rhoInit'), delimiter), m, m, k);
-       piInit = transpose(dlmread(strcat(testFolder,'piInit'), delimiter));
-       gamInit = reshape(dlmread(strcat(testFolder,'gamInit'), delimiter), n, k);
-       mini = int64(dlmread(strcat(testFolder,'mini'), delimiter));
-       maxi = int64(dlmread(strcat(testFolder,'maxi'), delimiter));
-       gamma = dlmread(strcat(testFolder,'gamma'), delimiter);
-       glambda = dlmread(strcat(testFolder,'glambda'), delimiter);
-       X = reshape(dlmread(strcat(testFolder,'X'), delimiter), n, p);
-       Y = reshape(dlmread(strcat(testFolder,'Y'), delimiter), n, m);
-       seuil = dlmread(strcat(testFolder,'seuil'), delimiter);
-       tau = dlmread(strcat(testFolder,'tau'), delimiter);
-       A1 = int64(reshape(dlmread(strcat(testFolder,'A1'), delimiter), p, m+1, L));
-       A2 = int64(reshape(dlmread(strcat(testFolder,'A2'), delimiter), p, m+1, L));
-
-       %run constructionModelesLassoMLE.m
-       [phi,rho,pi,lvraisemblance] = constructionModelesLassoMLE(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,X,Y,seuil,tau,A1,A2);
-
-       %get all stored outputs
-       ref_phi = reshape(dlmread(strcat(testFolder,'phi'), delimiter), p, m, k, L);
-       ref_rho = reshape(dlmread(strcat(testFolder,'rho'), delimiter), m, m, k, L);
-       ref_pi = reshape(dlmread(strcat(testFolder,'pi'), delimiter), k, L);
-       ref_lvraisemblance = reshape(dlmread(strcat(testFolder,'lvraisemblance'), delimiter), L, 2);
-
-       %check that output correspond to stored output
-       tol = 1e-5;
-       checkOutput('phi',phi,ref_phi,tol);
-       checkOutput('rho',rho,ref_rho,tol);
-       checkOutput('pi',pi,ref_pi,tol);
-       checkOutput('lvraisemblance',lvraisemblance,ref_lvraisemblance,tol);
-
-end
diff --git a/src/test/OLD_TEST_MATLAB/testEMGLLF.m b/src/test/OLD_TEST_MATLAB/testEMGLLF.m
deleted file mode 100644 (file)
index 0dd9db8..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-function[] = testEMGLLF()
-
-       testFolder = 'data/';
-       delimiter = '\n';
-
-       %get dimensions
-       dimensions = dlmread(strcat(testFolder,'dimensions'), delimiter);
-       n = dimensions(1);
-       p = dimensions(2);
-       m = dimensions(3);
-       k = dimensions(4);
-
-       %get all input arrays
-       phiInit = reshape(dlmread(strcat(testFolder,'phiInit'), delimiter), p, m, k);
-       rhoInit = reshape(dlmread(strcat(testFolder,'rhoInit'), delimiter), m, m, k);
-       piInit = transpose(dlmread(strcat(testFolder,'piInit'), delimiter));
-       gamInit = reshape(dlmread(strcat(testFolder,'gamInit'), delimiter), n, k);
-       mini = int64(dlmread(strcat(testFolder,'mini'), delimiter));
-       maxi = int64(dlmread(strcat(testFolder,'maxi'), delimiter));
-       gamma = dlmread(strcat(testFolder,'gamma'), delimiter);
-       lambda = dlmread(strcat(testFolder,'lambda'), delimiter);
-       X = reshape(dlmread(strcat(testFolder,'X'), delimiter), n, p);
-       Y = reshape(dlmread(strcat(testFolder,'Y'), delimiter), n, m);
-       tau = dlmread(strcat(testFolder,'tau'), delimiter);
-
-       %run EMGLLF.m
-       [phi,rho,pi,LLF,S] = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau);
-
-       %get all stored outputs
-       ref_phi = reshape(dlmread(strcat(testFolder,'phi'), delimiter), p, m, k);
-       ref_rho = reshape(dlmread(strcat(testFolder,'rho'), delimiter), m, m, k);
-       ref_pi = dlmread(strcat(testFolder,'pi'), delimiter);
-       ref_LLF = dlmread(strcat(testFolder,'LLF'), delimiter);
-       ref_S = reshape(dlmread(strcat(testFolder,'S'), delimiter), p, m, k);
-
-       %check that output correspond to stored output
-       tol = 1e-5;
-       checkOutput('phi',phi,ref_phi,tol);
-       checkOutput('rho',rho,ref_rho,tol);
-       checkOutput('pi',pi,ref_pi,tol);
-       checkOutput('LLF',LLF,ref_LLF,tol);
-       checkOutput('S',S,ref_S,tol);
-
-end
diff --git a/src/test/OLD_TEST_MATLAB/testSelectiontotale.m b/src/test/OLD_TEST_MATLAB/testSelectiontotale.m
deleted file mode 100644 (file)
index 996017f..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-function[] = testSelectiontotale()
-
-       testFolder = 'data/';
-       delimiter = '\n';
-
-       %get dimensions
-       dimensions = dlmread(strcat(testFolder,'dimensions'), delimiter);
-       n = dimensions(1);
-       p = dimensions(2);
-       m = dimensions(3);
-       k = dimensions(4);
-       L = dimensions(5);
-
-       %get all input arrays
-       phiInit = reshape(dlmread(strcat(testFolder,'phiInit'), delimiter), p, m, k);
-       rhoInit = reshape(dlmread(strcat(testFolder,'rhoInit'), delimiter), m, m, k);
-       piInit = transpose(dlmread(strcat(testFolder,'piInit'), delimiter));
-       gamInit = reshape(dlmread(strcat(testFolder,'gamInit'), delimiter), n, k);
-       mini = int64(dlmread(strcat(testFolder,'mini'), delimiter));
-       maxi = int64(dlmread(strcat(testFolder,'maxi'), delimiter));
-       gamma = dlmread(strcat(testFolder,'gamma'), delimiter);
-       glambda = dlmread(strcat(testFolder,'glambda'), delimiter);
-       X = reshape(dlmread(strcat(testFolder,'X'), delimiter), n, p);
-       Y = reshape(dlmread(strcat(testFolder,'Y'), delimiter), n, m);
-       seuil = dlmread(strcat(testFolder,'seuil'), delimiter);
-       tau = dlmread(strcat(testFolder,'tau'), delimiter);
-
-       %run constructionModelesLassoMLE.m
-       [A1,A2,Rho,Pi] = selectiontotale(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,X,Y,seuil,tau);
-
-       %get all stored outputs
-       ref_A1 = int64(reshape(dlmread(strcat(testFolder,'A1'), delimiter), p, m+1, L));
-       ref_A2 = int64(reshape(dlmread(strcat(testFolder,'A2'), delimiter), p, m+1, L));
-       ref_Rho = reshape(dlmread(strcat(testFolder,'Rho'), delimiter), m, m, k, L);
-       ref_Pi = reshape(dlmread(strcat(testFolder,'Pi'), delimiter), k, L);
-
-       %check that output correspond to stored output
-       tol = 1e-5;
-       checkOutput('A1',A1,ref_A1,tol);
-       checkOutput('A2',A2,ref_A2,tol);
-       checkOutput('Rho',Rho,ref_Rho,tol);
-       checkOutput('Pi',Pi,ref_Pi,tol);
-
-end
diff --git a/src/test/generate_test_data/MATLAB_test_helpers/EMGLLF.m b/src/test/generate_test_data/MATLAB_test_helpers/EMGLLF.m
new file mode 100644 (file)
index 0000000..618ffba
--- /dev/null
@@ -0,0 +1,174 @@
+function[phi,rho,pi,LLF,S] = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,lambda,X,Y,tau)\r
+\r
+       %Get matrices dimensions\r
+       PI = 4.0 * atan(1.0);\r
+       n = size(X, 1);\r
+       [p,m,k] = size(phiInit);\r
+\r
+       %Initialize outputs\r
+       phi = phiInit;\r
+       rho = rhoInit;\r
+       pi = piInit;\r
+       LLF = zeros(maxi,1);\r
+       S = zeros(p,m,k);\r
+\r
+       %Other local variables\r
+       %NOTE: variables order is always n,p,m,k\r
+       gam = gamInit;\r
+       Gram2 = zeros(p,p,k);\r
+       ps2 = zeros(p,m,k);\r
+       b = zeros(k,1);\r
+       pen = zeros(maxi,k);\r
+       X2 = zeros(n,p,k);\r
+       Y2 = zeros(n,m,k);\r
+       dist = 0;\r
+       dist2 = 0;\r
+       ite = 1;\r
+       pi2 = zeros(k,1);\r
+       ps = zeros(m,k);\r
+       nY2 = zeros(m,k);\r
+       ps1 = zeros(n,m,k);\r
+       nY21 = zeros(n,m,k);\r
+       Gam = zeros(n,k);\r
+       EPS = 1e-15;\r
+\r
+       while ite<=mini || (ite<=maxi && (dist>=tau || dist2>=sqrt(tau)))\r
+\r
+               Phi = phi;\r
+               Rho = rho;\r
+               Pi = pi;\r
+\r
+               %Calculs associés à Y et X\r
+               for r=1:k\r
+                       for mm=1:m\r
+                               Y2(:,mm,r) = sqrt(gam(:,r)) .* Y(:,mm);\r
+                       end\r
+                       for i=1:n\r
+                               X2(i,:,r) = X(i,:) .* sqrt(gam(i,r));\r
+                       end\r
+                       for mm=1:m\r
+                               ps2(:,mm,r) = transpose(X2(:,:,r)) * Y2(:,mm,r);\r
+                       end\r
+                       for j=1:p\r
+                               for s=1:p\r
+                                       Gram2(j,s,r) = dot(X2(:,j,r), X2(:,s,r));\r
+                               end\r
+                       end\r
+               end\r
+\r
+               %%%%%%%%%%\r
+               %Etape M %\r
+               %%%%%%%%%%\r
+\r
+               %Pour pi\r
+               for r=1:k\r
+                       b(r) = sum(sum(abs(phi(:,:,r))));\r
+               end\r
+               gam2 = sum(gam,1);\r
+               a = sum(gam*transpose(log(pi)));\r
+\r
+               %tant que les proportions sont negatives\r
+               kk = 0;\r
+               pi2AllPositive = false;\r
+               while ~pi2AllPositive\r
+                       pi2 = pi + 0.1^kk * ((1/n)*gam2 - pi);\r
+                       pi2AllPositive = true;\r
+                       for r=1:k\r
+                               if pi2(r) < 0\r
+                                       pi2AllPositive = false;\r
+                                       break;\r
+                               end\r
+                       end\r
+                       kk = kk+1;\r
+               end\r
+\r
+               %t(m) la plus grande valeur dans la grille O.1^k tel que ce soit\r
+               %décroissante ou constante\r
+               while (-1/n*a+lambda*((pi.^gamma)*b))<(-1/n*gam2*transpose(log(pi2))+lambda.*(pi2.^gamma)*b) && kk<1000\r
+                       pi2 = pi+0.1^kk*(1/n*gam2-pi);\r
+                       kk = kk+1;\r
+               end\r
+               t = 0.1^(kk);\r
+               pi = (pi+t*(pi2-pi)) / sum(pi+t*(pi2-pi));\r
+\r
+               %Pour phi et rho\r
+               for r=1:k\r
+                       for mm=1:m\r
+                               for i=1:n\r
+                                       ps1(i,mm,r) = Y2(i,mm,r) * dot(X2(i,:,r), phi(:,mm,r));\r
+                                       nY21(i,mm,r) = (Y2(i,mm,r))^2;\r
+                               end\r
+                               ps(mm,r) = sum(ps1(:,mm,r));\r
+                               nY2(mm,r) = sum(nY21(:,mm,r));\r
+                               rho(mm,mm,r) = ((ps(mm,r)+sqrt(ps(mm,r)^2+4*nY2(mm,r)*(gam2(r))))/(2*nY2(mm,r)));\r
+                       end\r
+               end\r
+               for r=1:k\r
+                       for j=1:p\r
+                               for mm=1:m\r
+                                       S(j,mm,r) = -rho(mm,mm,r)*ps2(j,mm,r) + dot(phi(1:j-1,mm,r),Gram2(j,1:j-1,r)')...\r
+                                               + dot(phi(j+1:p,mm,r),Gram2(j,j+1:p,r)');\r
+                                       if abs(S(j,mm,r)) <= n*lambda*(pi(r)^gamma)\r
+                                               phi(j,mm,r)=0;\r
+                                       else\r
+                                               if S(j,mm,r)> n*lambda*(pi(r)^gamma)\r
+                                                       phi(j,mm,r)=(n*lambda*(pi(r)^gamma)-S(j,mm,r))/Gram2(j,j,r);\r
+                                               else\r
+                                                       phi(j,mm,r)=-(n*lambda*(pi(r)^gamma)+S(j,mm,r))/Gram2(j,j,r);\r
+                                               end\r
+                                       end\r
+                               end\r
+                       end\r
+               end\r
+\r
+               %%%%%%%%%%\r
+               %Etape E %\r
+               %%%%%%%%%%\r
+\r
+               sumLogLLF2 = 0.0;\r
+               for i=1:n\r
+                       %precompute dot products to numerically adjust their values\r
+                       dotProducts = zeros(k,1);\r
+                       for r=1:k\r
+                               dotProducts(r)= (Y(i,:)*rho(:,:,r)-X(i,:)*phi(:,:,r)) * transpose(Y(i,:)*rho(:,:,r)-X(i,:)*phi(:,:,r));\r
+                       end\r
+                       shift = 0.5*min(dotProducts);\r
+\r
+                       %compute Gam(:,:) using shift determined above\r
+                       sumLLF1 = 0.0;\r
+                       for r=1:k\r
+                               Gam(i,r) = pi(r)*det(rho(:,:,r))*exp(-0.5*dotProducts(r) + shift);\r
+                               sumLLF1 = sumLLF1 + Gam(i,r)/(2*PI)^(m/2);\r
+                       end\r
+                       sumLogLLF2 = sumLogLLF2 + log(sumLLF1);\r
+                       sumGamI = sum(Gam(i,:));\r
+                       if sumGamI > EPS\r
+                               gam(i,:) = Gam(i,:) / sumGamI;\r
+                       else\r
+                               gam(i,:) = zeros(k,1);\r
+                       end\r
+               end\r
+\r
+               sumPen = 0.0;\r
+               for r=1:k\r
+                       sumPen = sumPen + pi(r).^gamma .* b(r);\r
+               end\r
+               LLF(ite) = -(1/n)*sumLogLLF2 + lambda*sumPen;\r
+\r
+               if ite == 1\r
+                       dist = LLF(ite);\r
+               else\r
+                       dist = (LLF(ite)-LLF(ite-1))/(1+abs(LLF(ite)));\r
+               end\r
+\r
+               Dist1=max(max(max((abs(phi-Phi))./(1+abs(phi)))));\r
+               Dist2=max(max(max((abs(rho-Rho))./(1+abs(rho)))));\r
+               Dist3=max(max((abs(pi-Pi))./(1+abs(Pi))));\r
+               dist2=max([Dist1,Dist2,Dist3]);\r
+\r
+               ite=ite+1;\r
+       end\r
+\r
+       pi = transpose(pi);\r
+\r
+end\r
diff --git a/src/test/generate_test_data/MATLAB_test_helpers/EMGrank.m b/src/test/generate_test_data/MATLAB_test_helpers/EMGrank.m
new file mode 100644 (file)
index 0000000..76074b7
--- /dev/null
@@ -0,0 +1,69 @@
+function[phi,LLF] = EMGrank(Pi,Rho,mini,maxi,X,Y,tau,rank)\r
+\r
+       % get matrix sizes\r
+       [~,m,k] = size(Rho);\r
+       [n,p] = size(X);\r
+\r
+       % allocate output matrices\r
+       phi = zeros(p,m,k);\r
+       Z = ones(n,1,'int64');\r
+       LLF = 0.0;\r
+\r
+       % local variables\r
+       Phi = zeros(p,m,k);\r
+       deltaPhi = 0.0;\r
+       deltaPhi = [];\r
+       sumDeltaPhi = 0.0;\r
+       deltaPhiBufferSize = 20;\r
+\r
+       %main loop (at least mini iterations)\r
+       ite = int64(1);\r
+       while ite<=mini || (ite<=maxi && sumDeltaPhi>tau)\r
+\r
+               %M step: Mise à jour de Beta (et donc phi)\r
+               for r=1:k\r
+                       if (sum(Z==r) == 0)\r
+                               continue;\r
+                       end\r
+                       %U,S,V = SVD of (t(Xr)Xr)^{-1} * t(Xr) * Yr\r
+                       [U,S,V] = svd(pinv(transpose(X(Z==r,:))*X(Z==r,:))*transpose(X(Z==r,:))*Y(Z==r,:));\r
+                       %Set m-rank(r) singular values to zero, and recompose\r
+                       %best rank(r) approximation of the initial product\r
+                       S(rank(r)+1:end,:) = 0;\r
+                       phi(:,:,r) = U * S * transpose(V) * Rho(:,:,r);\r
+               end\r
+\r
+               %Etape E et calcul de LLF\r
+               sumLogLLF2 = 0.0;\r
+               for i=1:n\r
+                       sumLLF1 = 0.0;\r
+                       maxLogGamIR = -Inf;\r
+                       for r=1:k\r
+                               dotProduct = (Y(i,:)*Rho(:,:,r)-X(i,:)*phi(:,:,r)) * transpose(Y(i,:)*Rho(:,:,r)-X(i,:)*phi(:,:,r));\r
+                               logGamIR = log(Pi(r)) + log(det(Rho(:,:,r))) - 0.5*dotProduct;\r
+                               %Z(i) = index of max (gam(i,:))\r
+                               if logGamIR > maxLogGamIR\r
+                                       Z(i) = r;\r
+                                       maxLogGamIR = logGamIR;\r
+                               end\r
+                               sumLLF1 = sumLLF1 + exp(logGamIR) / (2*pi)^(m/2);\r
+                       end\r
+                       sumLogLLF2 = sumLogLLF2 + log(sumLLF1);\r
+               end\r
+\r
+               LLF = -1/n * sumLogLLF2;\r
+\r
+               % update distance parameter to check algorithm convergence (delta(phi, Phi))\r
+               deltaPhi = [ deltaPhi, max(max(max((abs(phi-Phi))./(1+abs(phi))))) ];\r
+               if length(deltaPhi) > deltaPhiBufferSize\r
+                       deltaPhi = deltaPhi(2:length(deltaPhi));\r
+               end\r
+               sumDeltaPhi = sum(abs(deltaPhi));\r
+\r
+               % update other local variables\r
+               Phi = phi;\r
+               ite = ite+1;\r
+\r
+       end\r
+\r
+end\r
diff --git a/src/test/generate_test_data/MATLAB_test_helpers/Octave.m b/src/test/generate_test_data/MATLAB_test_helpers/Octave.m
new file mode 100644 (file)
index 0000000..8f105d2
--- /dev/null
@@ -0,0 +1,5 @@
+% Run this file if using Octave
+if exist('OCTAVE_VERSION', 'builtin') ~= 0
+       pkg load statistics %for random()
+       more off %to prevent bufferizing output
+end
diff --git a/src/test/generate_test_data/MATLAB_test_helpers/README b/src/test/generate_test_data/MATLAB_test_helpers/README
new file mode 100644 (file)
index 0000000..e94c0a0
--- /dev/null
@@ -0,0 +1 @@
+Subset of select/ project: only files required to generate tests outputs
diff --git a/src/test/generate_test_data/MATLAB_test_helpers/constructionModelesLassoMLE.m b/src/test/generate_test_data/MATLAB_test_helpers/constructionModelesLassoMLE.m
new file mode 100644 (file)
index 0000000..5b7c65e
--- /dev/null
@@ -0,0 +1,58 @@
+function[phi,rho,pi,lvraisemblance] = constructionModelesLassoMLE(...
+       phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,X,Y,seuil,tau,A1,A2)
+
+       PI = 4.0 * atan(1.0);
+
+       %get matrix sizes
+       n = size(X, 1);
+       [p,m,k] = size(phiInit);
+       L = length(glambda);
+
+       %output parameters
+       phi = zeros(p,m,k,L);
+       rho = zeros(m,m,k,L);
+       pi = zeros(k,L);
+       lvraisemblance = zeros(L,2);
+
+       for lambdaIndex=1:L
+               % Procedure Lasso-MLE
+               a = A1(:,1,lambdaIndex);
+               a(a==0) = [];
+               if length(a) == 0
+                       continue;
+               end
+               [phiLambda,rhoLambda,piLambda,~,~] = EMGLLF(...
+                       phiInit(a,:,:),rhoInit,piInit,gamInit,mini,maxi,gamma,0,X(:,a),Y,tau);
+
+               for j=1:length(a)
+                       phi(a(j),:,:,lambdaIndex) = phiLambda(j,:,:);
+               end
+               rho(:,:,:,lambdaIndex) = rhoLambda;
+               pi(:,lambdaIndex) = piLambda;
+
+               dimension = 0;
+               for j=1:p
+                       b = A2(j,2:end,lambdaIndex);
+                       b(b==0) = [];
+                       if length(b) > 0
+                               phi(A2(j,1,lambdaIndex),b,:,lambdaIndex) = 0.0;
+                       end
+                       c = A1(j,2:end,lambdaIndex);
+                       c(c==0) = [];
+                       dimension = dimension + length(c);
+               end
+
+               %on veut calculer l'EMV avec toutes nos estimations
+               densite = zeros(n,L);
+               for i=1:n
+                       for r=1:k
+                               delta = Y(i,:)*rho(:,:,r,lambdaIndex) - (X(i,a)*(phi(a,:,r,lambdaIndex)));
+                               densite(i,lambdaIndex) = densite(i,lambdaIndex) +...
+                                       pi(r,lambdaIndex)*det(rho(:,:,r,lambdaIndex))/(sqrt(2*PI))^m*exp(-dot(delta,delta)/2.0);
+                       end
+               end
+               lvraisemblance(lambdaIndex,1) = sum(log(densite(:,lambdaIndex)));
+               lvraisemblance(lambdaIndex,2) = (dimension+m+1)*k-1;
+       end
+
+end
diff --git a/src/test/generate_test_data/MATLAB_test_helpers/constructionModelesLassoRank.m b/src/test/generate_test_data/MATLAB_test_helpers/constructionModelesLassoRank.m
new file mode 100644 (file)
index 0000000..415ab12
--- /dev/null
@@ -0,0 +1,40 @@
+function[phi,lvraisemblance] = constructionModelesLassoRank(Pi,Rho,mini,maxi,X,Y,tau,A1,rangmin,rangmax)
+
+       PI = 4.0 * atan(1.0);
+
+       %get matrix sizes
+       [n,p] = size(X);
+       [~,m,k,~] = size(Rho);
+       L = size(A1, 2); %A1 est p x m+1 x L ou p x L ?!
+
+       %On cherche les rangs possiblement intéressants
+       deltaRank = rangmax - rangmin + 1;
+       Size = deltaRank^k;
+       Rank = zeros(Size,k,'int64');
+       for r=1:k
+               %On veut le tableau de toutes les combinaisons de rangs possibles
+               %Dans la première colonne : on répète (rangmax-rangmin)^(k-1) chaque chiffre : ca remplit la colonne
+               %Dans la deuxieme : on répète (rangmax-rangmin)^(k-2) chaque chiffre, et on fait ca (rangmax-rangmin)^2 fois
+               %...
+               %Dans la dernière, on répète chaque chiffre une fois, et on fait ca (rangmin-rangmax)^(k-1) fois.
+               Rank(:,r) = rangmin + reshape(repmat(0:(deltaRank-1), deltaRank^(k-r), deltaRank^(r-1)), Size, 1);
+       end
+
+       %output parameters
+       phi = zeros(p,m,k,L*Size);
+       lvraisemblance = zeros(L*Size,2);
+       for lambdaIndex=1:L
+               %On ne garde que les colonnes actives
+               %active sera l'ensemble des variables informatives
+               active = A1(:,lambdaIndex);
+               active(active==0) = [];
+               if length(active) > 0
+                       for j=1:Size
+                               [phiLambda,LLF] = EMGrank(Pi(:,lambdaIndex),Rho(:,:,:,lambdaIndex),mini,maxi,X(:,active),Y,tau,Rank(j,:));
+                               lvraisemblance((lambdaIndex-1)*Size+j,:) = [LLF, sum(Rank(j,:) .* (length(active)-Rank(j,:)+m))];
+                               phi(active,:,:,(lambdaIndex-1)*Size+j) = phiLambda;
+                       end
+               end
+       end
+
+end
diff --git a/src/test/generate_test_data/MATLAB_test_helpers/selectiontotale.m b/src/test/generate_test_data/MATLAB_test_helpers/selectiontotale.m
new file mode 100644 (file)
index 0000000..5d36a96
--- /dev/null
@@ -0,0 +1,54 @@
+function[A1,A2,Rho,Pi] = selectiontotale(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda,X,Y,seuil,tau)
+
+       [p,m,k] = size(phiInit);
+       L = length(glambda);
+       A1 = zeros(p,m+1,L,'int64');
+       A2 = zeros(p,m+1,L,'int64');
+       Rho = zeros(m,m,k,L);
+       Pi = zeros(k,L);
+
+       %Pour chaque lambda de la grille, on calcule les coefficients
+       for lambdaIndex=1:L
+               [phi,rho,pi,~,~] = EMGLLF(phiInit,rhoInit,piInit,gamInit,mini,maxi,gamma,glambda(lambdaIndex),X,Y,tau);
+
+               %Si un des coefficients est supérieur au seuil, on garde cette variable
+               selectedVariables = zeros(p,m);
+               discardedVariables = zeros(p,m);
+               atLeastOneSelectedVariable = false;
+               for j=1:p
+                       cpt=1;
+                       cpt2=1;
+                       for mm=1:m
+                               if max(abs(phi(j,mm,:))) > seuil
+                                       selectedVariables(j,cpt) = mm;
+                                       cpt = cpt+1;
+                                       atLeastOneSelectedVariable = true;
+                               else
+                                       discardedVariables(j,cpt2) = mm;
+                                       cpt2 = cpt2+1;
+                               end
+                       end
+               end
+
+               %Si aucun des coefficients n'a été gardé on renvoit la matrice nulle
+               %Et si on enlevait ces colonnes de zéro ??? Indices des colonnes vides
+               if atLeastOneSelectedVariable
+                       vec = [];
+                       for j=1:p
+                               if selectedVariables(j,1) ~= 0
+                                       vec = [vec;j];
+                               end
+                       end
+
+                       %Sinon on renvoit les numéros des coefficients utiles
+                       A1(:,1,lambdaIndex) = [vec;zeros(p-length(vec),1)];
+                       A1(1:length(vec),2:m+1,lambdaIndex) = selectedVariables(vec,:);
+                       A2(:,1,lambdaIndex) = 1:p;
+                       A2(:,2:m+1,lambdaIndex) = discardedVariables;
+                       Rho(:,:,:,lambdaIndex) = rho;
+                       Pi(:,lambdaIndex) = pi;
+               end
+
+       end
+
+end
@@ -19,7 +19,7 @@ function[] = generateRunSaveTest_EMGLLF(n, p, m, k, mini, maxi, gamma, lambda, v
 
        testFolder = 'data/';
        mkdir(testFolder);
-       delimiter = '\n';
+       delimiter = ' ';
 
        %save inputs
        dlmwrite(strcat(testFolder,'phiInit'), reshape(phiInit,1,[]), delimiter);
diff --git a/src/test/generate_test_data/generateRunSaveTest_EMGrank.m b/src/test/generate_test_data/generateRunSaveTest_EMGrank.m
new file mode 100644 (file)
index 0000000..2301aa5
--- /dev/null
@@ -0,0 +1,45 @@
+function[] = generateRunSaveTest_EMGrank(n, p, m, k, mini, maxi, gamma, rank, varargin)
+
+       %set defaults for optional inputs
+       optargs = {200 15 10 3 5 10 1.0 1:3};
+       %replace defaults by user parameters
+       optargs(1:length(varargin)) = varargin;
+       [n, p, m, k, mini, maxi, gamma, rank] = optargs{:};
+       mini = int64(mini);
+       maxi = int64(maxi);
+       rank = int64(rank);
+       tau = 1e-6;
+
+       Pi = (1.0/k)*ones(1,k);
+       Rho = zeros(m,m,k);
+       for r=1:k
+               Rho(:,:,r) = eye(m);
+       end
+
+       %Generate X and Y
+       [X, Y, ~] = generateIOdefault(n, p, m, k);
+
+       %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+       testFolder = 'data/';
+       mkdir(testFolder);
+       delimiter = ' ';
+
+       %save inputs
+       dlmwrite(strcat(testFolder,'Rho'), reshape(Rho,1,[]), delimiter);
+       dlmwrite(strcat(testFolder,'Pi'), Pi, delimiter);
+       dlmwrite(strcat(testFolder,'mini'), mini, delimiter);
+       dlmwrite(strcat(testFolder,'maxi'), maxi, delimiter);
+       dlmwrite(strcat(testFolder,'X'), reshape(X,1,[]), delimiter);
+       dlmwrite(strcat(testFolder,'Y'), reshape(Y,1,[]), delimiter);
+       dlmwrite(strcat(testFolder,'tau'), tau, delimiter);
+       dlmwrite(strcat(testFolder,'rank'), rank, delimiter);
+       dlmwrite(strcat(testFolder,'dimensions'), [n,p,m,k], delimiter);;
+
+       [phi,LLF] = EMGrank(Pi,Rho,mini,maxi,X,Y,tau,rank);
+
+       %save output
+       dlmwrite(strcat(testFolder,'phi'), reshape(phi,1,[]), delimiter);
+       dlmwrite(strcat(testFolder,'LLF'), LLF, delimiter);
+
+end
@@ -1,5 +1,5 @@
 function[] = generateRunSaveTest_constructionModelesLassoMLE(n, p, m, k, mini, maxi, gamma, glambda, varargin)
-       
+
        %set defaults for optional inputs
        optargs = {200 15 10 3 5 10 1.0 [0.0,0.01,0.02,0.03,0.05,0.1,0.2,0.3,0.5,0.7,0.85,0.99]};
        %replace defaults by user parameters
@@ -27,12 +27,12 @@ function[] = generateRunSaveTest_constructionModelesLassoMLE(n, p, m, k, mini, m
         A1(:,1,i) = 1:p;
         A1(1:5,2,i) = 1:5;
     end
-    
+
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
        testFolder = 'data/';
        mkdir(testFolder);
-       delimiter = '\n';
+       delimiter = ' ';
 
        %save inputs
        dlmwrite(strcat(testFolder,'phiInit'), reshape(phiInit,1,[]), delimiter);
diff --git a/src/test/generate_test_data/generateRunSaveTest_constructionModelesLassoRank.m b/src/test/generate_test_data/generateRunSaveTest_constructionModelesLassoRank.m
new file mode 100644 (file)
index 0000000..80d9db3
--- /dev/null
@@ -0,0 +1,59 @@
+function[] = generateRunSaveTest_constructionModelesLassoRank(n, p, m, k, L, mini, maxi, gamma, rangmin, rangmax, varargin)
+
+       %set defaults for optional inputs
+       optargs = {200 15 10 3 12 5 10 1.0 3 6};
+       %replace defaults by user parameters
+       optargs(1:length(varargin)) = varargin;
+       [n, p, m, k, L, mini, maxi, gamma, rangmin, rangmax] = optargs{:};
+       mini = int64(mini);
+       maxi = int64(maxi);
+       rangmin = int64(rangmin);
+       rangmax = int64(rangmax);
+       tau = 1e-6;
+
+       Pi = zeros(k,L);
+       for l=1:L
+               Pi(:,l) = (1.0/k)*ones(1,k);
+       end
+       Rho = zeros(m,m,k,L);
+       for l=1:L
+               for r=1:k
+                       Rho(:,:,r,l) = eye(m);
+               end
+       end
+
+       %Generate X and Y
+       [X, Y, ~] = generateIOdefault(n, p, m, k);
+
+       A1 = zeros(p,L,'int64');
+    for i=1:L
+        A1(:,i) = 1:p;
+    end
+
+       %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+       testFolder = 'data/';
+       mkdir(testFolder);
+       delimiter = ' ';
+
+       %save inputs
+       dlmwrite(strcat(testFolder,'Rho'), reshape(Rho,1,[]), delimiter);
+       dlmwrite(strcat(testFolder,'Pi'), reshape(Pi,1,[]), delimiter);
+       dlmwrite(strcat(testFolder,'mini'), mini, delimiter);
+       dlmwrite(strcat(testFolder,'maxi'), maxi, delimiter);
+       dlmwrite(strcat(testFolder,'X'), reshape(X,1,[]), delimiter);
+       dlmwrite(strcat(testFolder,'Y'), reshape(Y,1,[]), delimiter);
+       dlmwrite(strcat(testFolder,'tau'), tau, delimiter);
+       dlmwrite(strcat(testFolder,'A1'), reshape(A1,1,[]), delimiter);
+       dlmwrite(strcat(testFolder,'rangmin'), rangmin, delimiter);
+       dlmwrite(strcat(testFolder,'rangmax'), rangmax, delimiter);
+       dlmwrite(strcat(testFolder,'dimensions'), [n,p,m,k,L], delimiter);
+
+       [phi,lvraisemblance] = constructionModelesLassoRank(Pi,Rho,mini,maxi,X,Y,tau,A1,rangmin,rangmax);
+
+       %save output
+       Size = (rangmax-rangmin+1)^k;
+       dlmwrite(strcat(testFolder,'phi'), reshape(phi,1,[]), delimiter);
+       dlmwrite(strcat(testFolder,'lvraisemblance'), reshape(lvraisemblance,1,[]), delimiter);
+
+end
@@ -13,7 +13,7 @@ function[] = generateRunSaveTest_selectiontotale(n, p, m, k, mini, maxi, gamma,
 
        %Generate phiInit,piInit,...
        [phiInit,rhoInit,piInit,gamInit] = basicInitParameters(n, p, m, k);
-       
+
        %Generate X and Y
        [X, Y, ~] = generateIOdefault(n, p, m, k);
 
@@ -21,7 +21,7 @@ function[] = generateRunSaveTest_selectiontotale(n, p, m, k, mini, maxi, gamma,
 
        testFolder = 'data/';
        mkdir(testFolder);
-       delimiter = '\n';
+       delimiter = ' ';
 
        %save inputs
        dlmwrite(strcat(testFolder,'phiInit'), reshape(phiInit,1,[]), delimiter);