projects
/
morpheus.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Add a number_of_cores parameter for OpenMP // in Compute_Omega
[morpheus.git]
/
pkg
/
src
/
functions.c
diff --git
a/pkg/src/functions.c
b/pkg/src/functions.c
index
b634c98
..
94ea12d
100644
(file)
--- a/
pkg/src/functions.c
+++ b/
pkg/src/functions.c
@@
-1,4
+1,5
@@
#include <stdlib.h>
#include <stdlib.h>
+#include <omp.h>
// Index matrix (by columns)
#define mi(i, j, d1, d2) (j*d1 + i)
// Index matrix (by columns)
#define mi(i, j, d1, d2) (j*d1 + i)
@@
-50,9
+51,9
@@
void Moments_M3(double* X, double* Y, int* pn, int* pd, double* M3)
// W = 1/N sum( t(g(Zi,theta)) g(Zi,theta) )
// with g(Zi, theta) = i-th contribution to all moments (size dim) - real moments
// W = 1/N sum( t(g(Zi,theta)) g(Zi,theta) )
// with g(Zi, theta) = i-th contribution to all moments (size dim) - real moments
-void Compute_Omega(double* X, int* Y, double* M, int* pn, int* pd, double* W)
+void Compute_Omega(double* X, int* Y, double* M, int* pn
c, int* pn
, int* pd, double* W)
{
{
- int n=*pn, d=*pd;
+ int n
c=*pnc, n
=*pn, d=*pd;
int dim = d + d*d + d*d*d;
//double* W = (double*)malloc(dim*dim*sizeof(double));
int dim = d + d*d + d*d*d;
//double* W = (double*)malloc(dim*dim*sizeof(double));
@@
-63,6
+64,8
@@
void Compute_Omega(double* X, int* Y, double* M, int* pn, int* pd, double* W)
W[j*dim+k] = 0.0;
}
double* g = (double*)malloc(dim*sizeof(double));
W[j*dim+k] = 0.0;
}
double* g = (double*)malloc(dim*sizeof(double));
+ omp_set_num_threads(nc >= 1 ? nc : omp_get_num_procs());
+ #pragma omp parallel for
for (int i=0; i<n; i++)
{
// g == gi:
for (int i=0; i<n; i++)
{
// g == gi:
@@
-92,11
+95,21
@@
void Compute_Omega(double* X, int* Y, double* M, int* pn, int* pd, double* W)
g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)]*X[mi(i,idx3,n,d)] - M[j];
}
// Add 1/n t(gi) %*% gi to W
g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)]*X[mi(i,idx3,n,d)] - M[j];
}
// Add 1/n t(gi) %*% gi to W
- for (int j=
0; j<dim; j++
)
+ for (int j=
dim-1; j>=0; j--
)
{
{
- for (int k=0; k<dim; k++)
- W[j*dim+k] += g[j] * g[k] / n;
+ // This final nested loop is very costly. Some basic optimisations:
+ double gj = g[j];
+ int baseIdx = j * dim;
+ #pragma GCC unroll 100
+ for (int k=dim-1; k>=0; k--)
+ W[baseIdx+k] += gj * g[k];
}
}
}
}
+ // Normalize W: x 1/n
+ for (int j=0; j<dim; j++)
+ {
+ for (int k=0; k<dim; k++)
+ W[mi(j,k,dim,dim)] /= n;
+ }
free(g);
}
free(g);
}