From 9fdd3e5f57aa1fd0f839214d9cbfac022a8739a8 Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Tue, 14 Jan 2020 19:37:33 +0100
Subject: [PATCH] It's enough to compute half of W (complete by symmetry)

---
 pkg/src/functions.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/pkg/src/functions.c b/pkg/src/functions.c
index 94ea12d..6444b86 100644
--- a/pkg/src/functions.c
+++ b/pkg/src/functions.c
@@ -95,21 +95,27 @@ void Compute_Omega(double* X, int* Y, double* M, int* pnc, int* pn, int* pd, dou
       g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)]*X[mi(i,idx3,n,d)] - M[j];
     }
     // Add 1/n t(gi) %*% gi to W
-    for (int j=dim-1; j>=0; j--)
+    for (int j=0; j<dim; j++)
     {
       // This final nested loop is very costly. Some basic optimisations:
       double gj = g[j];
       int baseIdx = j * dim;
-      #pragma GCC unroll 100
-      for (int k=dim-1; k>=0; k--)
+      #pragma GCC unroll 32
+      for (int k=j; k>=0; k--)
         W[baseIdx+k] += gj * g[k];
     }
   }
   // Normalize W: x 1/n
   for (int j=0; j<dim; j++)
   {
-    for (int k=0; k<dim; k++)
+    for (int k=0; k<=j; k++)
       W[mi(j,k,dim,dim)] /= n;
   }
+  // Symmetrize W: W[j,k] = W[k,j] for k > j
+  for (int j=0; j<dim; j++)
+  {
+    for (int k=j+1: k<dim; k++)
+      W[mi(j,k,dim,dim)] = W[mi(k,j,dim,dim)];
+  }
   free(g);
 }
-- 
2.44.0