#include <stdlib.h>
-#include <omp.h>
+//#include <omp.h>
// Index matrix (by columns)
#define mi(i, j, d1, d2) (j*d1 + i)
// with g(Zi, theta) = i-th contribution to all moments (size dim) - real moments
void Compute_Omega(double* X, int* Y, double* M, int* pnc, int* pn, int* pd, double* W)
{
- int nc=*pnc, n=*pn, d=*pd;
+ int n=*pn, d=*pd; //,nc=*pnc
int dim = d + d*d + d*d*d;
//double* W = (double*)malloc(dim*dim*sizeof(double));
W[j*dim+k] = 0.0;
}
double* g = (double*)malloc(dim*sizeof(double));
- omp_set_num_threads(nc >= 1 ? nc : omp_get_num_procs());
- #pragma omp parallel for
+ // TODO: stabilize this (for now, random result)
+// omp_set_num_threads(nc >= 1 ? nc : omp_get_num_procs());
+// #pragma omp parallel for
for (int i=0; i<n; i++)
{
// g == gi:
// This final nested loop is very costly. Some basic optimisations:
double gj = g[j];
int baseIdx = j * dim;
- #pragma GCC unroll 32
+// #pragma GCC unroll 32
for (int k=j; k>=0; k--)
W[baseIdx+k] += gj * g[k];
}
// Normalize W: x 1/n
for (int j=0; j<dim; j++)
{
- for (int k=0; k<=j; k++)
+ for (int k=j; k<dim; k++)
W[mi(j,k,dim,dim)] /= n;
}
- // Symmetrize W: W[j,k] = W[k,j] for k > j
+ // Symmetrize W: W[k,j] = W[j,k] for k > j
for (int j=0; j<dim; j++)
{
- for (int k=j+1: k<dim; k++)
- W[mi(j,k,dim,dim)] = W[mi(k,j,dim,dim)];
+ for (int k=j+1; k<dim; k++)
+ W[mi(k,j,dim,dim)] = W[mi(j,k,dim,dim)];
}
free(g);
}