distances = computeWerDists(synchrones, nbytes, endian, ncores_clust, verbose, parll)
if (verbose)
cat(paste(" algoClust2() on ",nrow(distances)," items\n", sep=""))
- medoids[ algoClust2(distances,K2), ]
+ medoids[ ,algoClust2(distances,K2) ]
}
#' computeSynchrones
}
ref_series = getRefSeries(indices)
- nb_series = nrow(ref_series)
+ nb_series = ncol(ref_series)
# Get medoids indices for this chunk of series
mi = computeMedoidsIndices(medoids@address, ref_series)
#' @param nb_series_per_chunk (Maximum) number of series to retrieve in one batch
#' @param algoClust1 Clustering algorithm for stage 1. A function which takes (data, K)
#' as argument where data is a matrix in columns and K the desired number of clusters,
-#' and outputs K medoids ranks. Default: PAM.
-#' In our method, this function is called on iterated medoids during stage 1
+#' and outputs K medoids ranks. Default: PAM. In our method, this function is called
+#' on iterated medoids during stage 1
#' @param algoClust2 Clustering algorithm for stage 2. A function which takes (dists, K)
#' as argument where dists is a matrix of distances and K the desired number of clusters,
-#' and outputs K clusters representatives (curves). Default: PAM.
-#' In our method, this function is called on a matrix of K1 x K1 (WER) distances computed
-# between synchrones
+#' and outputs K medoids ranks. Default: PAM. In our method, this function is called
+#' on a matrix of K1 x K1 (WER) distances computed between synchrones
#' @param nb_items_clust1 (~Maximum) number of items in input of the clustering algorithm
#' for stage 1. At worst, a clustering algorithm might be called with ~2*nb_items_clust1
#' items; but this could only happen at the last few iterations.
claws <- function(getSeries, K1, K2, nb_series_per_chunk,
nb_items_clust1=7*K1,
algoClust1=function(data,K) cluster::pam(t(data),K,diss=FALSE)$id.med,
- algoClust2=function(dists,K) t( cluster::pam(dists,K,diss=TRUE)$medoids ),
+ algoClust2=function(dists,K) cluster::pam(dists,K,diss=TRUE)$id.med,
wav_filt="d8", contrib_type="absolute",
WER="end",sync_mean=TRUE,
random=TRUE,
if (verbose)
cat("...Compute contributions and serialize them\n")
nb_curves = binarizeTransform(getSeries,
- function(series) curvesToContribs(series, wf, ctype),
+ function(series) curvesToContribs(series, wav_filt, contrib_type),
contribs_file, nb_series_per_chunk, nbytes, endian)
getContribs = function(indices) getDataInFile(indices, contribs_file, nbytes, endian)
if (verbose)
cat("...Serialize contributions computed on synchrones\n")
ignored = binarizeTransform(getSeries,
- function(series) curvesToContribs(series, wf, ctype),
+ function(series) curvesToContribs(series, wav_filt, contrib_type),
contribs_file, nb_series_per_chunk, nbytes, endian)
}
#' @export
curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE)
{
+ series = as.matrix(series) #1D serie could occur
L = nrow(series)
- if (coin) browser()
D = ceiling( log2(L) )
nb_sample_points = 2^D
apply(series, 2, function(x) {
interpolated_curve = spline(1:L, x, n=nb_sample_points)$y
- W = wavelets::dwt(interpolated_curve, filter=wf, D)@W
+ W = wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W
nrj = rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
if (contrib_type!="absolute")
nrj = nrj / sum(nrj)
ctype = "absolute"
getContribs = function(indices) curvesToContribs(series[,indices],wf,ctype)
require("cluster", quietly=TRUE)
- browser()
- algoClust1 = function(contribs,K) cluster::pam(contribs,K,diss=FALSE)$id.med
+ algoClust1 = function(contribs,K) cluster::pam(t(contribs),K,diss=FALSE)$id.med
indices1 = clusteringTask1(1:n, getContribs, K1, algoClust1, 75, verbose=TRUE, parll=FALSE)
medoids_K1 = getSeries(indices1)
s = lapply( seq_len(K1), function(i) x^(1+i/30)*cos(x+i) )
series = matrix(nrow=L, ncol=n)
for (i in seq_len(n))
- series[i,] = s[[I(i,K1)]] + rnorm(L,sd=0.01)
+ series[,i] = s[[I(i,K1)]] + rnorm(L,sd=0.01)
getRefSeries = function(indices) {
indices = indices[indices <= n]
if (length(indices)>0) series[,indices] else NULL
}
# Artificially simulate 60 medoids - perfect situation, all equal to one of the refs
medoids_K1 = bigmemory::as.big.matrix( sapply( 1:K1, function(i) s[[I(i,K1)]] ) )
- medoids_K2 = clusteringTask2(medoids_K1, K2, getRefSeries, n, 75, verbose=TRUE, parll=FALSE)
+ algoClust2 = function(dists,K) cluster::pam(dists,K,diss=TRUE)$id.med
+ medoids_K2 = clusteringTask2(medoids_K1, K2, algoClust2, getRefSeries,
+ n, 75, sync_mean=TRUE, verbose=TRUE, parll=FALSE)
expect_equal(dim(medoids_K2), c(L,K2))
# Not easy to evaluate result: at least we expect it to be better than random selection of