4 // Index matrix (by columns)
5 #define mi(i, j, d1, d2) (j*d1 + i)
7 // Index 3-tensor (by columns, matrices ordered by last dim)
8 #define ti(i, j, k, d1, d2, d3) (k*d1*d2 + j*d1 + i)
10 // Empirical cross-moment of order 2 between X size nxd and Y size n
11 void Moments_M2(double* X
, double* Y
, int* pn
, int* pd
, double* M2
)
14 //double* M2 = (double*)calloc(d*d,sizeof(double));
16 // M2 = E[Y*X^*2] - E[Y*e^*2] = E[Y (X^*2 - I)]
17 for (int j
=0; j
<d
; j
++)
19 for (int i
=0; i
<n
; i
++)
21 M2
[mi(j
,j
,d
,d
)] -= Y
[i
] / n
;
22 for (int k
=0; k
<d
; k
++)
23 M2
[mi(j
,k
,d
,d
)] += Y
[i
] * X
[mi(i
,j
,n
,d
)]*X
[mi(i
,k
,n
,d
)] / n
;
28 // Empirical cross-moment of order 3 between X size nxd and Y size n
29 void Moments_M3(double* X
, double* Y
, int* pn
, int* pd
, double* M3
)
32 //double* M3 = (double*)calloc(d*d*d,sizeof(double));
34 // M3 = E[Y*X^*3] - E[Y*e*X*e] - E[Y*e*e*X] - E[Y*X*e*e]
35 for (int j
=0; j
<d
; j
++)
37 for (int k
=0; k
<d
; k
++)
39 for (int i
=0; i
<n
; i
++)
41 double tensor_elt
= Y
[i
]*X
[mi(i
,k
,n
,d
)] / n
;
42 M3
[ti(j
,k
,j
,d
,d
,d
)] -= tensor_elt
;
43 M3
[ti(j
,j
,k
,d
,d
,d
)] -= tensor_elt
;
44 M3
[ti(k
,j
,j
,d
,d
,d
)] -= tensor_elt
;
45 for (int o
=0; o
<d
; o
++)
46 M3
[ti(j
,k
,o
,d
,d
,d
)] += Y
[i
] * X
[mi(i
,j
,n
,d
)]*X
[mi(i
,k
,n
,d
)]*X
[mi(i
,o
,n
,d
)] / n
;
52 // W = 1/N sum( t(g(Zi,theta)) g(Zi,theta) )
53 // with g(Zi, theta) = i-th contribution to all moments (size dim) - real moments
54 void Compute_Omega(double* X
, int* Y
, double* M
, int* pnc
, int* pn
, int* pd
, double* W
)
56 int nc
=*pnc
, n
=*pn
, d
=*pd
;
57 int dim
= d
+ d
*d
+ d
*d
*d
;
58 //double* W = (double*)malloc(dim*dim*sizeof(double));
61 for (int j
=0; j
<dim
; j
++)
63 for (int k
=0; k
<dim
; k
++)
66 double* g
= (double*)malloc(dim
*sizeof(double));
67 omp_set_num_threads(nc
>= 1 ? nc
: omp_get_num_procs());
68 #pragma omp parallel for
69 for (int i
=0; i
<n
; i
++)
72 for (int j
=0; j
<d
; j
++)
73 g
[j
] = Y
[i
] * X
[mi(i
,j
,n
,d
)] - M
[j
];
74 for (int j
=d
; j
<d
+(d
*d
); j
++)
76 int idx1
= (j
-d
) % d
; //num row
77 int idx2
= ((j
-d
) - idx1
) / d
; //num col
81 g
[j
] += Y
[i
] * X
[mi(i
,idx1
,n
,d
)]*X
[mi(i
,idx2
,n
,d
)] - M
[j
];
83 for (int j
=d
+d
*d
; j
<dim
; j
++)
85 int idx1
= (j
-d
-d
*d
) % d
; //num row
86 int idx2
= ((j
-d
-d
*d
- idx1
) / d
) %d
; //num col
87 int idx3
= (((j
-d
-d
*d
- idx1
) / d
) - idx2
) / d
; //num "depth"
90 g
[j
] -= Y
[i
] * X
[mi(i
,idx3
,n
,d
)];
92 g
[j
] -= Y
[i
] * X
[mi(i
,idx2
,n
,d
)];
94 g
[j
] -= Y
[i
] * X
[mi(i
,idx1
,n
,d
)];
95 g
[j
] += Y
[i
] * X
[mi(i
,idx1
,n
,d
)]*X
[mi(i
,idx2
,n
,d
)]*X
[mi(i
,idx3
,n
,d
)] - M
[j
];
97 // Add 1/n t(gi) %*% gi to W
98 for (int j
=0; j
<dim
; j
++)
100 // This final nested loop is very costly. Some basic optimisations:
102 int baseIdx
= j
* dim
;
103 #pragma GCC unroll 32
104 for (int k
=j
; k
>=0; k
--)
105 W
[baseIdx
+k
] += gj
* g
[k
];
108 // Normalize W: x 1/n
109 for (int j
=0; j
<dim
; j
++)
111 for (int k
=j
; k
<dim
; k
++)
112 W
[mi(j
,k
,dim
,dim
)] /= n
;
114 // Symmetrize W: W[k,j] = W[j,k] for k > j
115 for (int j
=0; j
<dim
; j
++)
117 for (int k
=j
+1; k
<dim
; k
++)
118 W
[mi(k
,j
,dim
,dim
)] = W
[mi(j
,k
,dim
,dim
)];