first commit
[synclust.git] / R / distances.R
1 #build similarity matrix W (NOTE : sparse matrix ==> optimizations later)
2 getSimilarityMatrix = function(NI)
3 {
4 # using a local sigma would be nice, but break W symmetry,
5 # which cannot easily be repaired then (??!)
6 # ==> we use a global sigma, with a very simple heuristic
7
8 n = length(NI$ix)
9 distances = c()
10 for (i in 1:n) distances = c(distances,NI$ds[[i]])
11 distances = unique(distances)
12 sigma2 = median(distances)^2 #for example...
13
14 W = matrix(0.0,nrow=n,ncol=n)
15 for (i in 1:n)
16 W[ i, NI$ix[[i]] ] = exp( - NI$ds[[i]]^2 / sigma2 )
17
18 return (W)
19 }
20
21 #epsilon constant, used as a zero threshold
22 EPS = 100 * .Machine$double.eps
23
24 #Moore-Penrose pseudo inverse
25 mppsinv = function(M)
26 {
27 s = svd(M)
28 sdiag = s$d ; sdiag[sdiag < EPS] = Inf
29 p = min(nrow(M),ncol(M))
30 sdiag = diag(1.0 / sdiag, p)
31 return ((s$v) %*% sdiag %*% t(s$u))
32 }
33
34 #get distance matrix from data and similarity : Commute Time
35 getECTDistances = function(NI)
36 {
37 n = length(NI$ix) ; seqVect = 1:n
38 if (n <= 1) return (0.0) #nothing to do...
39
40 #get laplacian (...inverse) :
41 W = getSimilarityMatrix(NI)
42 invLap = mppsinv(diag(rowSums(W)) - W)
43
44 #...and distances
45 ectd = matrix(0.0, nrow=n, ncol=n)
46 for (ij in 1:n)
47 {
48 ectd[ij,] = ectd[ij,] + invLap[ij,ij]
49 ectd[,ij] = ectd[,ij] + invLap[ij,ij]
50 }
51 ectd = ectd - 2*invLap
52 return (ectd)
53 }
54
55 # Call Dijsktra algorithm on every vertex to build distances matrix
56 getShortestPathDistances = function(NI)
57 {
58 n = length(NI$ix)
59 distancesIn = matrix(NA,nrow=n,ncol=n)
60 for (i in 1:n)
61 distancesIn[i,NI$ix[[i]]] = NI$ds[[i]]
62
63 distancesOut = matrix(nrow=n, ncol=n)
64 for (i in 1:n)
65 distancesOut[i,] = .Call("dijkstra", distancesIn, i)
66 return (distancesOut)
67 }
68
69 ## MAIN CALL to get distances matrix
70 getDistances = function(dtype, NI)
71 {
72 distances = matrix()
73 if (dtype=="spath")
74 distances = getShortestPathDistances(NI)
75 else if (dtype=="ectd")
76 distances = getECTDistances(NI)
77
78 diag(distances) = 0.0 #distances to self are zero
79 return (distances)
80 }