+//void Compute_Omega(double* X, int* Y, double* M, int* pn, int* pd, double* W)
+//{
+// int n=*pn, d=*pd;
+// int dim = d + d*d + d*d*d;
+// //double* W = (double*)malloc(dim*dim*sizeof(double));
+//
+// // (Re)Initialize W:
+// for (int j=0; j<dim; j++)
+// {
+// for (int k=0; k<dim; k++)
+// W[j*dim+k] = 0.0;
+// }
+// double* g = (double*)malloc(dim*sizeof(double));
+// for (int i=0; i<n; i++)
+// {
+// // g == gi:
+// for (int j=0; j<d; j++)
+// g[j] = Y[i] * X[mi(i,j,n,d)] - M[j];
+// for (int j=d; j<d+(d*d); j++)
+// {
+// int idx1 = (j-d) % d; //num row
+// int idx2 = ((j-d) - idx1) / d; //num col
+// g[j] = 0.0;
+// if (idx1 == idx2)
+// g[j] -= Y[i];
+// g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)] - M[j];
+// }
+// for (int j=d+d*d; j<dim; j++)
+// {
+// int idx1 = (j-d-d*d) % d; //num row
+// int idx2 = ((j-d-d*d - idx1) / d) %d; //num col
+// int idx3 = (((j-d-d*d - idx1) / d) - idx2) / d; //num "depth"
+// g[j] = 0.0;
+// if (idx1 == idx2)
+// g[j] -= Y[i] * X[mi(i,idx3,n,d)];
+// if (idx1 == idx3)
+// g[j] -= Y[i] * X[mi(i,idx2,n,d)];
+// if (idx2 == idx3)
+// g[j] -= Y[i] * X[mi(i,idx1,n,d)];
+// g[j] += Y[i] * X[mi(i,idx1,n,d)]*X[mi(i,idx2,n,d)]*X[mi(i,idx3,n,d)] - M[j];
+// }
+// // Add 1/n t(gi) %*% gi to W
+// for (int j=0; j<dim; j++)
+// {
+// for (int k=0; k<dim; k++)
+// W[j*dim+k] += g[j] * g[k] / n;
+// }
+// }
+// free(g);
+//}
+
+// Optimisation attempt:
+void Compute_Omega(double* X, int* Y, double* M, int* pn, int* pd, double* W)