| 1 | #include <stdlib.h> |
| 2 | #include <omp.h> |
| 3 | |
| 4 | // Index matrix (by columns) |
| 5 | #define mi(i, j, d1, d2) (j*d1 + i) |
| 6 | |
| 7 | // Index 3-tensor (by columns, matrices ordered by last dim) |
| 8 | #define ti(i, j, k, d1, d2, d3) (k*d1*d2 + j*d1 + i) |
| 9 | |
| 10 | // Empirical cross-moment of order 2 between X size nxd and Y size n |
| 11 | void Moments_M2(double* X, double* Y, int* pn, int* pd, double* M2) |
| 12 | { |
| 13 | int n=*pn, d=*pd; |
| 14 | //double* M2 = (double*)calloc(d*d,sizeof(double)); |
| 15 | |
| 16 | // M2 = E[Y*X^*2] - E[Y*e^*2] = E[Y (X^*2 - I)] |
| 17 | for (int j=0; j<d; j++) |
| 18 | { |
| 19 | for (int i=0; i<n; i++) |
| 20 | { |
| 21 | M2[mi(j,j,d,d)] -= Y[i] / n; |
| 22 | for (int k=0; k<d; k++) |
| 23 | M2[mi(j,k,d,d)] += Y[i] * X[mi(i,j,n,d)]*X[mi(i,k,n,d)] / n; |
| 24 | } |
| 25 | } |
| 26 | } |
| 27 | |
| 28 | // Empirical cross-moment of order 3 between X size nxd and Y size n |
| 29 | void Moments_M3(double* X, double* Y, int* pn, int* pd, double* M3) |
| 30 | { |
| 31 | int n=*pn, d=*pd; |
| 32 | //double* M3 = (double*)calloc(d*d*d,sizeof(double)); |
| 33 | |
| 34 | // M3 = E[Y*X^*3] - E[Y*e*X*e] - E[Y*e*e*X] - E[Y*X*e*e] |
| 35 | for (int j=0; j<d; j++) |
| 36 | { |
| 37 | for (int k=0; k<d; k++) |
| 38 | { |
| 39 | for (int i=0; i<n; i++) |
| 40 | { |
| 41 | double tensor_elt = Y[i]*X[mi(i,k,n,d)] / n; |
| 42 | M3[ti(j,k,j,d,d,d)] -= tensor_elt; |
| 43 | M3[ti(j,j,k,d,d,d)] -= tensor_elt; |
| 44 | M3[ti(k,j,j,d,d,d)] -= tensor_elt; |
| 45 | for (int o=0; o<d; o++) |
| 46 | M3[ti(j,k,o,d,d,d)] += Y[i] * X[mi(i,j,n,d)]*X[mi(i,k,n,d)]*X[mi(i,o,n,d)] / n; |
| 47 | } |
| 48 | } |
| 49 | } |
| 50 | } |
| 51 | |
| 52 | // W = 1/N sum( t(g(Zi,theta)) g(Zi,theta) ) |
| 53 | // with g(Zi, theta) = i-th contribution to all moments (size dim) - real moments |
| 54 | void Compute_Omega(double* X, int* Y, double* M, int* pnc, int* pn, int* pd, double* W) |
| 55 | { |
| 56 | int nc=*pnc, n=*pn, d=*pd; |
| 57 | int dim = d + d*d + d*d*d; |
| 58 | //double* W = (double*)malloc(dim*dim*sizeof(double)); |
| 59 | |
| 60 | // (Re)Initialize W: |
| 61 | for (int j=0; j<dim; j++) |
| 62 | { |
| 63 | for (int k=0; k<dim; k++) |
| 64 | W[j*dim+k] = 0.0; |
| 65 | } |
| 66 | double* g = (double*)malloc(dim*sizeof(double)); |
| 67 | // TODO: stabilize this (for now, random result) |
| 68 | // omp_set_num_threads(nc >= 1 ? nc : omp_get_num_procs()); |
| 69 | // #pragma omp parallel for |
| 70 | for (int i=0; i<n; i++) |
| 71 | { |
| 72 | // g == gi: |
| 73 | for (int j=0; j<d; j++) |
| 74 | g[j] = Y[i] * X[mi(i,j,n,d)] - M[j]; |
| 75 | for (int j=d; j<d+(d*d); j++) |
| 76 | { |
| 77 | int idx1 = (j-d) % d; //num row |
| 78 | int idx2 = ((j-d) - idx1) / d; //num col |
| 79 | g[j] = 0.0; |
| 80 | if (idx1 == idx2) |
| 81 | g[j] -= Y[i]; |
| 82 | g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)] - M[j]; |
| 83 | } |
| 84 | for (int j=d+d*d; j<dim; j++) |
| 85 | { |
| 86 | int idx1 = (j-d-d*d) % d; //num row |
| 87 | int idx2 = ((j-d-d*d - idx1) / d) %d; //num col |
| 88 | int idx3 = (((j-d-d*d - idx1) / d) - idx2) / d; //num "depth" |
| 89 | g[j] = 0.0; |
| 90 | if (idx1 == idx2) |
| 91 | g[j] -= Y[i] * X[mi(i,idx3,n,d)]; |
| 92 | if (idx1 == idx3) |
| 93 | g[j] -= Y[i] * X[mi(i,idx2,n,d)]; |
| 94 | if (idx2 == idx3) |
| 95 | g[j] -= Y[i] * X[mi(i,idx1,n,d)]; |
| 96 | g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)]*X[mi(i,idx3,n,d)] - M[j]; |
| 97 | } |
| 98 | // Add 1/n t(gi) %*% gi to W |
| 99 | for (int j=0; j<dim; j++) |
| 100 | { |
| 101 | // This final nested loop is very costly. Some basic optimisations: |
| 102 | double gj = g[j]; |
| 103 | int baseIdx = j * dim; |
| 104 | #pragma GCC unroll 32 |
| 105 | for (int k=j; k>=0; k--) |
| 106 | W[baseIdx+k] += gj * g[k]; |
| 107 | } |
| 108 | } |
| 109 | // Normalize W: x 1/n |
| 110 | for (int j=0; j<dim; j++) |
| 111 | { |
| 112 | for (int k=j; k<dim; k++) |
| 113 | W[mi(j,k,dim,dim)] /= n; |
| 114 | } |
| 115 | // Symmetrize W: W[k,j] = W[j,k] for k > j |
| 116 | for (int j=0; j<dim; j++) |
| 117 | { |
| 118 | for (int k=j+1; k<dim; k++) |
| 119 | W[mi(k,j,dim,dim)] = W[mi(j,k,dim,dim)]; |
| 120 | } |
| 121 | free(g); |
| 122 | } |