1 #example of "not too bad" parameters
13 #MAIN FUNCTION : direct clustering from a neighborhoods graph, or get regions
14 #from (Poisson) distribution parameters optimization, using convex relaxation.
15 findSyncVarRegions = function(
16 method, #global method: "direct" or "convex"
17 M, #matrix of observations in rows, the two last columns
18 #corresponding to geographic coordinates;
19 #set to NULL to use our initial dataset (625 rows / 9 years)
20 k, #number of neighbors
21 alpha, #weight parameter for intra-neighborhoods distance computations
22 #0 = take only geographic coordinates into account
23 #1 = take only observations over the years into account
24 #in-between : several levels of compromise
25 #-1 or any negative value : use a heuristic to choose alpha
26 gmode, #0 = reduced [mutual] kNN; 1 = augmented kNN; (symmetric)
27 #2 = normal kNN; 3 = one NN in each quadrant; (NON-symmetric)
28 #NOTE: gmode==3 automatically sets k==4 (at most!)
29 K, #number of clusters
30 dtype, #distance type, in {"simple","spath","ectd"}.
31 #NOTE: better avoid "simple" if gmode>=2
32 cmeth, #clustering method, in {"KM","HC","spec"} for k-means (distances based)
33 #or hierarchical clustering, or spectral clustering (only if gmode>=2)
34 pcoef=1.0, #penalty value for convex optimization
35 h=1e-3, #step in the min LL algorithm
36 eps=1e-3, #threshold to stop min.LL iterations
37 maxit=1e3, #maximum number of iterations in the min LL algo
38 showLL=TRUE, #print trace of log-likelihood evolution
39 disp=TRUE #true for interactive display (otherwise nothing gets plotted)
41 #get matrix M if not directly provided
44 data("example", package="synclust")
48 M = as.matrix(read.table(M))
53 #pretreatment for neighborhoods search: standardize M columns
54 #TODO: maybe apply only on coordinates columns ?
57 #get neighborhoods [FALSE because NOT simpleDists; see C code]
58 NI = .Call("getNeighbors", std$M, k, alpha, gmode, FALSE)
60 #optional intermediate display : map + graph (monocolor)
62 promptForMapDisplay("interm", M[,(m-1):m], NIix=NI$ix)
65 distances = matrix(NA,nrow=n,ncol=n)
68 ## DIRECT CLUSTERING ##
72 stop("'gmode' must be 0 or 1 for direct clustering")
74 stop("'dtype' cannot be set to \"simple\" for direct (graph!) clustering")
76 #find connected components in the graph defined by NI
77 cc = reordering(.Call("getConnectedComponents", NI$ix))
80 stop(paste("ABORT: too many connex components (found ",nbC,")",sep=''))
82 print(paste("*** WARNING:",nbC,"connex components ***"))
85 #for each connected component...
88 indices = (1:n)[cc == i]
91 next #nothing to do with such a small component
95 doClust = readline(paste(">>> cluster current component of cardinal",nc,"/",n,"? (y/n)\n"))
97 K = readline(">>> into how many groups ? (int >= 2)\n")
102 #0] remap NI in current connex component
103 locNI = remapNeighbors(NI, indices)
105 #1] determine similarity and distance matrices (e.g. using a random walk)
106 locDists = getDistances(dtype, locNI)
107 distances[indices,indices] = locDists
109 #2] cluster data inside connex component according to distance matrix
110 locClusters = getClusters(locDists, cmeth, K)
111 maxInd = max(clusters)
112 clusters[indices] = locClusters + maxInd #avoid indices overlaps
116 ## CONVEX RELAXATION ##
117 else if (method=="convex")
119 #preliminary: remove NA's by averaging over each serie's values
122 #use NI$ix and matrix M to estimate initial parameters,
123 #and then iterate until convergence to get f + theta
124 #theta == mean observations count at each site s
125 #f == estimated variations at each site ("time-series" of T points)
126 cxpar = .Call("getVarsWithConvexOptim",
127 M[,1:(m-2)], NI$ix, pcoef, h, eps, maxit, (gmode <= 1), showLL)
128 f = cxpar$f #the only one we use (others can be checked by user later)
130 #cluster "time-series" f, using simple kmeans/HC, spect.clust,
131 #or [in a graph] KM or HC, after redefining a NI (using f only)
135 #use R core functions
137 clusters = kmeans(f, K, iter.max=100, nstart=10)$cluster
138 else if (cmeth=="HC")
140 hct = hclust(dist(f), method="ward")
141 clusters = cutree(hct, K)
143 else if (cmeth=="spec")
146 clusters = as.integer(specc(f, K, kpar="automatic"))
152 # recompute NI from repaired/smoothed data [simpleDists=TRUE, no graph dists]
153 #NOTE: gmode==1, augmented kNN (arbitrary, but should be symmetric)
154 NI = .Call("getNeighbors", f, k, alpha, 1, TRUE)
156 #find connected components in the graph defined by NI
157 cc = reordering(.Call("getConnectedComponents", NI$ix))
161 stop(paste("ABORT: too many connex components (found ",nbC,")",sep=''))
163 print(paste("*** WARNING:",nbC,"connex components ***"))
166 #for each connected component...
169 indices = (1:n)[cc == i]
172 next #nothing to do with such a small component
176 doClust = readline(paste(">>> cluster current component of cardinal",nc,"/",n,"? (y/n)\n"))
178 K = readline(">>> into how many groups ? (int >= 2)\n")
183 #0] remap NI in current connex component
184 locNI = remapNeighbors(NI, indices)
186 #1] determine similarity and distance matrices (e.g. using a random walk)
187 locDists = getDistances(dtype, locNI)
188 distances[indices,indices] = locDists
190 #2] cluster data inside connex component according to distance matrix
191 locClusters = getClusters(locDists, cmeth, K)
192 maxInd = max(clusters)
193 clusters[indices] = locClusters + maxInd #avoid indices overlaps
198 clusters = reordering(clusters)
199 #optional final display : map with clusters colors
201 promptForMapDisplay("final", M[,(m-1):m], clusters=clusters)
203 #give back matrix M as given to the function
204 M = destandardize(std)
206 return (list("M"=M, "NI"=NI, "dists"=distances, "clusts"=clusters, "cxpar"=cxpar))