X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=epclust%2FR%2Fmain.R;h=2af6f90c00c85afbdc74a7e7237426ed4bd2cfa3;hb=9f05a4a0b703deffd7bdb9cd99b0aaa2246a5c83;hp=9e9b6411088f031a31e72bd3cdcc361430807378;hpb=0486fbadb122cb4d78c5d9f248c29800a59eb24e;p=epclust.git diff --git a/epclust/R/main.R b/epclust/R/main.R index 9e9b641..2af6f90 100644 --- a/epclust/R/main.R +++ b/epclust/R/main.R @@ -42,13 +42,12 @@ #' @param nb_series_per_chunk (Maximum) number of series to retrieve in one batch #' @param algoClust1 Clustering algorithm for stage 1. A function which takes (data, K) #' as argument where data is a matrix in columns and K the desired number of clusters, -#' and outputs K medoids ranks. Default: PAM. -#' In our method, this function is called on iterated medoids during stage 1 +#' and outputs K medoids ranks. Default: PAM. In our method, this function is called +#' on iterated medoids during stage 1 #' @param algoClust2 Clustering algorithm for stage 2. A function which takes (dists, K) #' as argument where dists is a matrix of distances and K the desired number of clusters, -#' and outputs K clusters representatives (curves). Default: PAM. -#' In our method, this function is called on a matrix of K1 x K1 (WER) distances computed -# between synchrones +#' and outputs K medoids ranks. Default: PAM. In our method, this function is called +#' on a matrix of K1 x K1 (WER) distances computed between synchrones #' @param nb_items_clust1 (~Maximum) number of items in input of the clustering algorithm #' for stage 1. At worst, a clustering algorithm might be called with ~2*nb_items_clust1 #' items; but this could only happen at the last few iterations. @@ -141,7 +140,7 @@ claws <- function(getSeries, K1, K2, nb_series_per_chunk, nb_items_clust1=7*K1, algoClust1=function(data,K) cluster::pam(t(data),K,diss=FALSE)$id.med, - algoClust2=function(dists,K) t( cluster::pam(dists,K,diss=TRUE)$medoids ), + algoClust2=function(dists,K) cluster::pam(dists,K,diss=TRUE)$id.med, wav_filt="d8", contrib_type="absolute", WER="end",sync_mean=TRUE, random=TRUE, @@ -211,7 +210,7 @@ claws <- function(getSeries, K1, K2, nb_series_per_chunk, if (verbose) cat("...Compute contributions and serialize them\n") nb_curves = binarizeTransform(getSeries, - function(series) curvesToContribs(series, wf, ctype), + function(series) curvesToContribs(series, wav_filt, contrib_type), contribs_file, nb_series_per_chunk, nbytes, endian) getContribs = function(indices) getDataInFile(indices, contribs_file, nbytes, endian) @@ -313,7 +312,7 @@ claws <- function(getSeries, K1, K2, nb_series_per_chunk, if (verbose) cat("...Serialize contributions computed on synchrones\n") ignored = binarizeTransform(getSeries, - function(series) curvesToContribs(series, wf, ctype), + function(series) curvesToContribs(series, wav_filt, contrib_type), contribs_file, nb_series_per_chunk, nbytes, endian) } @@ -348,14 +347,15 @@ claws <- function(getSeries, K1, K2, nb_series_per_chunk, #' @return A [big.]matrix of size log(L) x n containing contributions in columns #' #' @export -curvesToContribs = function(series, wav_filt, contrib_type) +curvesToContribs = function(series, wav_filt, contrib_type, coin=FALSE) { + series = as.matrix(series) #1D serie could occur L = nrow(series) D = ceiling( log2(L) ) nb_sample_points = 2^D apply(series, 2, function(x) { interpolated_curve = spline(1:L, x, n=nb_sample_points)$y - W = wavelets::dwt(interpolated_curve, filter=wf, D)@W + W = wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W nrj = rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) ) if (contrib_type!="absolute") nrj = nrj / sum(nrj)