1 #' Direct clustering from a neighborhoods graph, or get regions from (Poisson)
2 #' distribution parameters optimization, using convex relaxation.
4 #' @param method Global method: "direct" or "convex"
5 #' @param M Matrix of observations in rows, the two last columns
6 #' corresponding to geographic coordinates;
7 #' set to NULL to use our initial dataset (625 rows / 9 years)
8 #' @param k Number of neighbors
9 #' @param alpha Weight parameter for intra-neighborhoods distance computations;
10 #' 0 = take only geographic coordinates into account;
11 #' 1 = take only observations over the years into account;
12 #' in-between : several levels of compromise;
13 #' -1 or any negative value : use a heuristic to choose alpha.
14 #' @param gmode Neighborhood type. 0 = reduced [mutual] kNN; 1 = augmented kNN (symmetric);
15 #' 2 = normal kNN; 3 = one NN in each quadrant; (NON-symmetric).
16 #' NOTE: gmode==3 automatically sets k==4 (at most!)
17 #' @param K Number of clusters
18 #' @param dtype Distance type, in {"simple","spath","ectd"}.
19 #' NOTE: better avoid "simple" if gmode>=2
20 #' @param cmeth Clustering method, in {"KM","HC","spec"} for k-means (distances based)
21 #' or hierarchical clustering, or spectral clustering (only if gmode>=2)
22 #' @param pcoef Penalty value for convex optimization [default: 1.0]
23 #' @param h Step in the min LL algorithm [default: 1e-3]
24 #' @param eps Threshold to stop min.LL iterations [default: 1e-3]
25 #' @param maxit Maximum number of iterations in the min LL algo [default: 1e3]
26 #' @param showLL Print trace of log-likelihood evolution [default: true]
27 #' @param disp True [default] for interactive display (otherwise nothing gets plotted)
28 #' @return list with the following entries. M: data matrix in input; NI: computed neighborhoods;
29 #' dists: computed distances matrix; clusts: partition into K clusters, as an integer vector;
30 #' cxpar: parameters obtained after convex optimization (if applicable)
33 #' cvr = findSyncVarRegions("convex",M=NULL,k=10,alpha=0.1,gmode=1,K=5,dtype="spath",cmeth="HC")
34 #' drawMapWithSitez(cvr$M, cvr$clusters)
35 #' drawNeighboroodGraph(cvr$M, cvr$NI)
37 findSyncVarRegions = function(method, M, k, alpha, gmode, K, dtype, cmeth,
38 pcoef=1.0, h=1e-3, eps=1e-3, maxit=1e3, showLL=TRUE, disp=TRUE)
40 #get matrix M if not directly provided
43 data("example", package="synclust")
47 M = as.matrix(read.table(M))
52 #pretreatment for neighborhoods search: standardize M columns
53 #TODO: maybe apply only on coordinates columns ?
56 #get neighborhoods [FALSE because NOT simpleDists; see C code]
57 NI = .Call("getNeighbors", std$M, k, alpha, gmode, FALSE)
59 #optional intermediate display : map + graph (monocolor)
61 promptForMapDisplay("interm", M[,(m-1):m], NIix=NI$ix)
64 distances = matrix(NA,nrow=n,ncol=n)
67 ## DIRECT CLUSTERING ##
71 stop("'gmode' must be 0 or 1 for direct clustering")
73 stop("'dtype' cannot be set to \"simple\" for direct (graph!) clustering")
75 #find connected components in the graph defined by NI
76 cc = reordering(.Call("getConnectedComponents", NI$ix))
79 stop(paste("ABORT: too many connex components (found ",nbC,")",sep=''))
81 print(paste("*** WARNING:",nbC,"connex components ***"))
84 #for each connected component...
87 indices = (1:n)[cc == i]
90 next #nothing to do with such a small component
94 doClust = readline(paste(">>> cluster current component of cardinal",nc,"/",n,"? (y/n)\n"))
96 K = readline(">>> into how many groups ? (int >= 2)\n")
101 #0] remap NI in current connex component
102 locNI = remapNeighbors(NI, indices)
104 #1] determine similarity and distance matrices (e.g. using a random walk)
105 locDists = getDistances(dtype, locNI)
106 distances[indices,indices] = locDists
108 #2] cluster data inside connex component according to distance matrix
109 locClusters = getClusters(locDists, cmeth, K)
110 maxInd = max(clusters)
111 clusters[indices] = locClusters + maxInd #avoid indices overlaps
115 ## CONVEX RELAXATION ##
116 else if (method=="convex")
118 #preliminary: remove NA's by averaging over each serie's values
121 #use NI$ix and matrix M to estimate initial parameters,
122 #and then iterate until convergence to get f + theta
123 #theta == mean observations count at each site s
124 #f == estimated variations at each site ("time-series" of T points)
125 cxpar = .Call("getVarsWithConvexOptim",
126 M[,1:(m-2)], NI$ix, pcoef, h, eps, maxit, (gmode <= 1), showLL)
127 f = cxpar$f #the only one we use (others can be checked by user later)
129 #cluster "time-series" f, using simple kmeans/HC, spect.clust,
130 #or [in a graph] KM or HC, after redefining a NI (using f only)
134 #use R core functions
136 clusters = kmeans(f, K, iter.max=100, nstart=10)$cluster
137 else if (cmeth=="HC")
139 hct = hclust(dist(f), method="ward")
140 clusters = cutree(hct, K)
142 else if (cmeth=="spec")
145 clusters = as.integer(specc(f, K, kpar="automatic"))
151 # recompute NI from repaired/smoothed data [simpleDists=TRUE, no graph dists]
152 #NOTE: gmode==1, augmented kNN (arbitrary, but should be symmetric)
153 NI = .Call("getNeighbors", f, k, alpha, 1, TRUE)
155 #find connected components in the graph defined by NI
156 cc = reordering(.Call("getConnectedComponents", NI$ix))
160 stop(paste("ABORT: too many connex components (found ",nbC,")",sep=''))
162 print(paste("*** WARNING:",nbC,"connex components ***"))
165 #for each connected component...
168 indices = (1:n)[cc == i]
171 next #nothing to do with such a small component
175 doClust = readline(paste(">>> cluster current component of cardinal",nc,"/",n,"? (y/n)\n"))
177 K = readline(">>> into how many groups ? (int >= 2)\n")
182 #0] remap NI in current connex component
183 locNI = remapNeighbors(NI, indices)
185 #1] determine similarity and distance matrices (e.g. using a random walk)
186 locDists = getDistances(dtype, locNI)
187 distances[indices,indices] = locDists
189 #2] cluster data inside connex component according to distance matrix
190 locClusters = getClusters(locDists, cmeth, K)
191 maxInd = max(clusters)
192 clusters[indices] = locClusters + maxInd #avoid indices overlaps
197 clusters = reordering(clusters)
198 #optional final display : map with clusters colors
200 promptForMapDisplay("final", M[,(m-1):m], clusters=clusters)
202 #give back matrix M as given to the function
203 M = destandardize(std)
205 return (list("M"=M, "NI"=NI, "dists"=distances, "clusts"=clusters, "cxpar"=cxpar))