X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=pkg%2Fsrc%2Ffunctions.c;h=94ea12dd5e52036f45b6792160944d851329a874;hb=5af71d43f3f2dba21c6667939fcff88923af3b7b;hp=0d632d3828216727b97e7cc676496e57ad0e87ec;hpb=cbd88fe5729bf206a784238a2637aa60e697fcdc;p=morpheus.git diff --git a/pkg/src/functions.c b/pkg/src/functions.c index 0d632d3..94ea12d 100644 --- a/pkg/src/functions.c +++ b/pkg/src/functions.c @@ -1,55 +1,115 @@ #include +#include -//index matrix (by columns) -int mi(int i, int j, int d1, int d2) -{ - return j*d1+i; -} +// Index matrix (by columns) +#define mi(i, j, d1, d2) (j*d1 + i) -//index 3-tensor (by columns, matrices ordered by last dim) -int ti(int i, int j, int k, int d1, int d2, int d3) -{ - return k*d1*d2 + j*d1 + i; -} +// Index 3-tensor (by columns, matrices ordered by last dim) +#define ti(i, j, k, d1, d2, d3) (k*d1*d2 + j*d1 + i) -// Emprical cross-moment of order 2 between X size nxd and Y size n +// Empirical cross-moment of order 2 between X size nxd and Y size n void Moments_M2(double* X, double* Y, int* pn, int* pd, double* M2) { - int n=*pn, d=*pd; - //double* M2 = (double*)calloc(d*d,sizeof(double)); - - // M2 = E[Y*X^*2] - E[Y*e^*2] = E[Y (X^*2 - I)] - for (int j=0; j= 1 ? nc : omp_get_num_procs()); + #pragma omp parallel for + for (int i=0; i=0; j--) + { + // This final nested loop is very costly. Some basic optimisations: + double gj = g[j]; + int baseIdx = j * dim; + #pragma GCC unroll 100 + for (int k=dim-1; k>=0; k--) + W[baseIdx+k] += gj * g[k]; + } + } + // Normalize W: x 1/n + for (int j=0; j