X-Git-Url: https://git.auder.net/?p=epclust.git;a=blobdiff_plain;f=epclust%2FR%2Fmain.R;h=2af6f90c00c85afbdc74a7e7237426ed4bd2cfa3;hp=e00393318fff4f58afaec1ac9dbe25d85b6e2d04;hb=9f05a4a0b703deffd7bdb9cd99b0aaa2246a5c83;hpb=0fe757f750f51e580d2c5a7b7f7df87cc405d12d diff --git a/epclust/R/main.R b/epclust/R/main.R index e003933..2af6f90 100644 --- a/epclust/R/main.R +++ b/epclust/R/main.R @@ -42,13 +42,12 @@ #' @param nb_series_per_chunk (Maximum) number of series to retrieve in one batch #' @param algoClust1 Clustering algorithm for stage 1. A function which takes (data, K) #' as argument where data is a matrix in columns and K the desired number of clusters, -#' and outputs K medoids ranks. Default: PAM. -#' In our method, this function is called on iterated medoids during stage 1 +#' and outputs K medoids ranks. Default: PAM. In our method, this function is called +#' on iterated medoids during stage 1 #' @param algoClust2 Clustering algorithm for stage 2. A function which takes (dists, K) #' as argument where dists is a matrix of distances and K the desired number of clusters, -#' and outputs K clusters representatives (curves). Default: PAM. -#' In our method, this function is called on a matrix of K1 x K1 (WER) distances computed -# between synchrones +#' and outputs K medoids ranks. Default: PAM. In our method, this function is called +#' on a matrix of K1 x K1 (WER) distances computed between synchrones #' @param nb_items_clust1 (~Maximum) number of items in input of the clustering algorithm #' for stage 1. At worst, a clustering algorithm might be called with ~2*nb_items_clust1 #' items; but this could only happen at the last few iterations. @@ -141,7 +140,7 @@ claws <- function(getSeries, K1, K2, nb_series_per_chunk, nb_items_clust1=7*K1, algoClust1=function(data,K) cluster::pam(t(data),K,diss=FALSE)$id.med, - algoClust2=function(dists,K) t( cluster::pam(dists,K,diss=TRUE)$medoids ), + algoClust2=function(dists,K) cluster::pam(dists,K,diss=TRUE)$id.med, wav_filt="d8", contrib_type="absolute", WER="end",sync_mean=TRUE, random=TRUE, @@ -211,7 +210,7 @@ claws <- function(getSeries, K1, K2, nb_series_per_chunk, if (verbose) cat("...Compute contributions and serialize them\n") nb_curves = binarizeTransform(getSeries, - function(series) curvesToContribs(series, wf, ctype), + function(series) curvesToContribs(series, wav_filt, contrib_type), contribs_file, nb_series_per_chunk, nbytes, endian) getContribs = function(indices) getDataInFile(indices, contribs_file, nbytes, endian) @@ -313,7 +312,7 @@ claws <- function(getSeries, K1, K2, nb_series_per_chunk, if (verbose) cat("...Serialize contributions computed on synchrones\n") ignored = binarizeTransform(getSeries, - function(series) curvesToContribs(series, wf, ctype), + function(series) curvesToContribs(series, wav_filt, contrib_type), contribs_file, nb_series_per_chunk, nbytes, endian) } @@ -350,13 +349,13 @@ claws <- function(getSeries, K1, K2, nb_series_per_chunk, #' @export curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE) { + series = as.matrix(series) #1D serie could occur L = nrow(series) - if (coin) browser() D = ceiling( log2(L) ) nb_sample_points = 2^D apply(series, 2, function(x) { interpolated_curve = spline(1:L, x, n=nb_sample_points)$y - W = wavelets::dwt(interpolated_curve, filter=wf, D)@W + W = wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W nrj = rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) ) if (contrib_type!="absolute") nrj = nrj / sum(nrj)