From 9fdd3e5f57aa1fd0f839214d9cbfac022a8739a8 Mon Sep 17 00:00:00 2001 From: Benjamin Auder <benjamin.auder@somewhere> Date: Tue, 14 Jan 2020 19:37:33 +0100 Subject: [PATCH] It's enough to compute half of W (complete by symmetry) --- pkg/src/functions.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pkg/src/functions.c b/pkg/src/functions.c index 94ea12d..6444b86 100644 --- a/pkg/src/functions.c +++ b/pkg/src/functions.c @@ -95,21 +95,27 @@ void Compute_Omega(double* X, int* Y, double* M, int* pnc, int* pn, int* pd, dou g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)]*X[mi(i,idx3,n,d)] - M[j]; } // Add 1/n t(gi) %*% gi to W - for (int j=dim-1; j>=0; j--) + for (int j=0; j<dim; j++) { // This final nested loop is very costly. Some basic optimisations: double gj = g[j]; int baseIdx = j * dim; - #pragma GCC unroll 100 - for (int k=dim-1; k>=0; k--) + #pragma GCC unroll 32 + for (int k=j; k>=0; k--) W[baseIdx+k] += gj * g[k]; } } // Normalize W: x 1/n for (int j=0; j<dim; j++) { - for (int k=0; k<dim; k++) + for (int k=0; k<=j; k++) W[mi(j,k,dim,dim)] /= n; } + // Symmetrize W: W[j,k] = W[k,j] for k > j + for (int j=0; j<dim; j++) + { + for (int k=j+1: k<dim; k++) + W[mi(j,k,dim,dim)] = W[mi(k,j,dim,dim)]; + } free(g); } -- 2.44.0