#' \item function: a custom way to retrieve the curves; it has two arguments: the start index
#' (start) and number of curves (n); see example in package vignette.
#' }
-#' @param K Number of clusters
-#' @param nb_series_per_chunk (Maximum) number of series in each group
+#' @param K1 Number of super-consumers to be found after stage 1 (K1 << N)
+#' @param K2 Number of clusters to be found after stage 2 (K2 << K1)
+#' @param ntasks Number of tasks (parallel iterations to obtain K1 medoids); default: 1.
+#' Note: ntasks << N, so that N is "roughly divisible" by N (number of series)
+#' @param nb_series_per_chunk (Maximum) number of series in each group, inside a task
#' @param min_series_per_chunk Minimum number of series in each group
#' @param writeTmp Function to write temporary wavelets coefficients (+ identifiers);
#' see defaults in defaults.R
#' @param wf Wavelet transform filter; see ?wt.filter. Default: haar
#' @param WER "end" to apply stage 2 after stage 1 has iterated and finished, or "mix"
#' to apply it after every stage 1
-#' @param ncores number of parallel processes; if NULL, use parallel::detectCores()
+#' @param ncores_tasks number of parallel tasks (1 to disable: sequential tasks)
+#' @param ncores_clust number of parallel clusterings in one task
#'
#' @return A data.frame of the final medoids curves (identifiers + values)
-epclust = function(data, K, nb_series_per_chunk, min_series_per_chunk=10*K,
- writeTmp=defaultWriteTmp, readTmp=defaultReadTmp, wf="haar", WER="end", ncores=NULL)
+#'
+#' @examples
+#' getData = function(start, n) {
+#' con = dbConnect(drv = RSQLite::SQLite(), dbname = "mydata.sqlite")
+#' df = dbGetQuery(con, paste(
+#' "SELECT * FROM times_values GROUP BY id OFFSET ",start,
+#' "LIMIT ", n, " ORDER BY date", sep=""))
+#' return (df)
+#' }
+#' cl = epclust(getData, K1=200, K2=15, ntasks=1000, nb_series_per_chunk=5000, WER="mix")
+#' @export
+epclust = function(data, K1, K2,
+ ntasks=1, nb_series_per_chunk=50*K1, min_series_per_chunk=5*K1,
+ writeTmp=defaultWriteTmp, readTmp=defaultReadTmp, wf="haar", WER="end",
+ ncores_tasks=1, ncores_clust=4)
{
#TODO: setRefClass(...) to avoid copy data:
#http://stackoverflow.com/questions/2603184/r-pass-by-reference
#2) process coeffs (by nb_series_per_chunk) and cluster them in parallel
library(parallel)
- ncores = ifelse(is.integer(ncores), ncores, parallel::detectCores()%/%4)
- cl = parallel::makeCluster(ncores)
- parallel::clusterExport(cl=cl, varlist=c("TODO:", "what", "to", "export?"), envir=environment())
+ cl_tasks = parallel::makeCluster(ncores_tasks)
+ #Nothing to export because each worker retrieve and put data from/on files (or DB)
+ #parallel::clusterExport(cl=cl, varlist=c("nothing","to","export"), envir=environment())
#TODO: be careful of writing to a new temp file, then flush initial one, then re-use it...
- repeat
- {
- #while there is jobs to do (i.e. size of tmp "file" is greater than nb_series_per_chunk)
- nb_workers = nb_curves %/% nb_series_per_chunk
- indices = list()
- #indices[[i]] == (start_index,number_of_elements)
- for (i in 1:nb_workers)
- indices[[i]] = c(nb_series_per_chunk*(i-1)+1, nb_series_per_chunk)
- remainder = nb_curves %% nb_series_per_chunk
- if (remainder >= min_series_per_chunk)
- {
- nb_workers = nb_workers + 1
- indices[[nb_workers]] = c(nb_curves-remainder+1, nb_curves)
- } else if (remainder > 0)
+ res_tasks = parallel::parSapply(cl_tasks, 1:ntasks, function() {
+ cl_clust = parallel::makeCluster(ncores_clust)
+ repeat
{
- #spread the load among other workers
- #...
+ #while there are jobs to do
+ #(i.e. size of tmp "file" is greater than ntasks * nb_series_per_chunk)
+ nb_workers = nb_curves %/% nb_series_per_chunk
+ indices = list()
+ #indices[[i]] == (start_index,number_of_elements)
+ for (i in 1:nb_workers)
+ indices[[i]] = c(nb_series_per_chunk*(i-1)+1, nb_series_per_chunk)
+ remainder = nb_curves %% nb_series_per_chunk
+ if (remainder >= min_series_per_chunk)
+ {
+ nb_workers = nb_workers + 1
+ indices[[nb_workers]] = c(nb_curves-remainder+1, nb_curves)
+ } else if (remainder > 0)
+ {
+ #spread the load among other workers
+ #...
+ }
+ res_clust = parallel::parSapply(cl, indices, processChunk, K, WER=="mix")
+ #C) flush tmp file (current parallel processes will write in it)
}
- li = parallel::parLapply(cl, indices, processChunk, K, WER=="mix")
- #C) flush tmp file (current parallel processes will write in it)
- }
- parallel::stopCluster(cl)
+ parallel:stopCluster(cl_clust)
+ })
+ parallel::stopCluster(cl_tasks)
#3) readTmp last results, apply PAM on it, and return medoids + identifiers
final_coeffs = readTmp(1, nb_series_per_chunk)
+library("Rwave")
+
#Entrée : courbes synchrones, soit après étape 1 itérée, soit après chaqure étape 1
+#TODO: bout de code qui calcule les courbes synchrones après étapes 1+2 à partir des ID médoïdes
#(Benjamin)
#à partir de là, "conso" == courbes synchrones
n <- nrow(conso)
delta <- ncol(conso)
-
#17000 colonnes coeff 1, puis 17000 coeff 2... [non : dans chaque tranche du cube]
-
-#TODO: une fonction qui fait lignes 59 à 91
-
-#cube:
-# Xcwt4 <- toCWT(conso, noctave = noctave4, dt = 1,
-# scalevector = scalevector4,
-# lt = delta, smooth = FALSE,
-# nvoice = nvoice) # observations node with CWT
-#
-# #matrix:
-# ############Xcwt2 <- matrix(0.0, nrow= n, ncol= 2 + delta * lscvect)
-# #Xcwt2 <- matrix(NA_complex_, nrow= n, ncol= 2 + length((c(Xcwt4[,,1]))))
-#
# #NOTE: delta et lscvect pourraient etre gardés à part (communs)
-# for(i in 1:n)
-# Xcwt2[i,] <- c(delta, lscvect, Xcwt4[,,i] / max(Mod(Xcwt4[,,i])) )
-#
-# #rm(conso, Xcwt4); gc()
-#
-# ## _.b WER^2 distances ########
-# Xwer_dist <- matrix(0.0, n, n)
-# for(i in 1:(n - 1)){
-# mat1 <- vect2mat(Xcwt2[i,])
-# for(j in (i + 1):n){
-# mat2 <- vect2mat(Xcwt2[j,])
-# num <- Mod(mat1 * Conj(mat2))
-# WX <- Mod(mat1 * Conj(mat1))
-# WY <- Mod(mat2 * Conj(mat2))
-# smsmnum <- smCWT(num, scalevector = scalevector4)
-# smsmWX <- smCWT(WX, scalevector = scalevector4)
-# smsmWY <- smCWT(WY, scalevector = scalevector4)
-# wer2 <- sum(colSums(smsmnum)^2) /
-# sum( sum(colSums(smsmWX) * colSums(smsmWY)) )
-# Xwer_dist[i, j] <- sqrt(delta * lscvect * (1 - wer2))
-# Xwer_dist[j, i] <- Xwer_dist[i, j]
-# }
-# }
-# diag(Xwer_dist) <- numeric(n)
-#
-# save(Xwer_dist, file = "../res/2009_synchros200WER.Rdata")
-# save(Xwer_dist, file = "../res/2009_synchros200-randomWER.Rdata")
-
-
#lignes 59 à 91 "dépliées" :
Xcwt4 <- toCWT(conso, noctave = noctave4, dt = 1,
scalevector = scalevector4,
lt = delta, smooth = FALSE,
nvoice = nvoice) # observations node with CWT
-
+
+#toCWT: (aux)
+##NOTE: renvoie une matrice 3D
+ toCWT <- function(X, sw= 0, tw= 0, swabs= 0,
+ nvoice= 12, noctave= 5,
+ s0= 2, w0= 2*pi, lt= 24, dt= 0.5,
+ spectra = FALSE, smooth = TRUE,
+ scaled = FALSE,
+ scalevector)
+ { noctave <- adjust.noctave(lt, dt, s0, tw, noctave)
+ if(missing(scalevector))
+ scalevector <- 2^(0:(noctave * nvoice) / nvoice) * s0
+ res <- lapply(1:nrow(X), function(n)
+ { tsX <- ts( X[n,] )
+ tsCent <- tsX - mean(tsX)
+ if(scaled) tsCent <- ts(scale(tsCent))
+ tsCent.cwt <- cwt.ts(tsCent, s0, noctave, nvoice, w0)
+ tsCent.cwt
+ } )
+ if( spectra ) res <- lapply(res, function(l) Mod(l)^2 )
+ if( smooth ) res <- lapply(res, smCWT, swabs = swabs,
+ tw = tw, dt = dt,
+ scalevector = scalevector)
+ resArray <- array(NA, c(nrow(res[[1]]), ncol(res[[1]]),
+ length(res)))
+ for( l in 1:length(res) ) resArray[ , , l] <- res[[l]]
+ resArray
+ }
+
+#from sowas
+cwt.ts <- function(ts,s0,noctave=5,nvoice=10,w0=2*pi){
+
+ if (class(ts)!="ts"){
+
+ cat("# This function needs a time series object as input. You may construct this by using the function ts(data,start,deltat). Try '?ts' for help.\n")
+
+ }
+ else{
+
+ t=time(ts)
+ dt=t[2]-t[1]
+
+ s0unit=s0/dt*w0/(2*pi)
+ s0log=as.integer((log2(s0unit)-1)*nvoice+1.5)
+
+ if (s0log<1){
+ cat(paste("# s0unit = ",s0unit,"\n",sep=""))
+ cat(paste("# s0log = ",s0log,"\n",sep=""))
+ cat("# s0 too small for w0! \n")
+ }
+ totnoct=noctave+as.integer(s0log/nvoice)+1
+
+ #cwt from package Rwave
+ totts.cwt=cwt(ts,totnoct,nvoice,w0,plot=0)
+
+ ts.cwt=totts.cwt[,s0log:(s0log+noctave*nvoice)]
+
+ #Normalization
+ sqs <- sqrt(2^(0:(noctave*nvoice)/nvoice)*s0)
+ smat <- matrix(rep(sqs,length(t)),nrow=length(t),byrow=TRUE)
+
+ ts.cwt*smat
+
+ }
+
+}
+
#matrix:
############Xcwt2 <- matrix(0.0, nrow= n, ncol= 2 + delta * lscvect)
Xcwt2 <- matrix(NA_complex_, nrow= n, ncol= 2 + length((c(Xcwt4[,,1]))))
smsmwsp
}
- #dans sowas.R
+ #dans sowas.R (...donc on ne lisse pas à ce niveau ?)
smooth.matrix <- function(wt,swabs){
if (swabs != 0)
}
#et filter() est dans stats::
+> filter
+function (x, filter, method = c("convolution", "recursive"),
+ sides = 2L, circular = FALSE, init = NULL)
+{
+ method <- match.arg(method)
+ x <- as.ts(x)
+ storage.mode(x) <- "double"
+ xtsp <- tsp(x)
+ n <- as.integer(NROW(x))
+ if (is.na(n))
+ stop("invalid value of nrow(x)", domain = NA)
+ nser <- NCOL(x)
+ filter <- as.double(filter)
+ nfilt <- as.integer(length(filter))
+ if (is.na(n))
+ stop("invalid value of length(filter)", domain = NA)
+ if (anyNA(filter))
+ stop("missing values in 'filter'")
+ if (method == "convolution") {
+ if (nfilt > n)
+ stop("'filter' is longer than time series")
+ sides <- as.integer(sides)
+ if (is.na(sides) || (sides != 1L && sides != 2L))
+ stop("argument 'sides' must be 1 or 2")
+ circular <- as.logical(circular)
+ if (is.na(circular))
+ stop("'circular' must be logical and not NA")
+ if (is.matrix(x)) {
+ y <- matrix(NA, n, nser)
+ for (i in seq_len(nser)) y[, i] <- .Call(C_cfilter,
+ x[, i], filter, sides, circular)
+ }
+ else y <- .Call(C_cfilter, x, filter, sides, circular)
+ }
+ else {
+ if (missing(init)) {
+ init <- matrix(0, nfilt, nser)
+ }
+ else {
+ ni <- NROW(init)
+ if (ni != nfilt)
+ stop("length of 'init' must equal length of 'filter'")
+ if (NCOL(init) != 1L && NCOL(init) != nser) {
+ stop(sprintf(ngettext(nser, "'init' must have %d column",
+ "'init' must have 1 or %d columns", domain = "R-stats"),
+ nser), domain = NA)
+ }
+ if (!is.matrix(init))
+ dim(init) <- c(nfilt, nser)
+ }
+ ind <- seq_len(nfilt)
+ if (is.matrix(x)) {
+ y <- matrix(NA, n, nser)
+ for (i in seq_len(nser)) y[, i] <- .Call(C_rfilter,
+ x[, i], filter, c(rev(init[, i]), double(n)))[-ind]
+ }
+ else y <- .Call(C_rfilter, x, filter, c(rev(init[, 1L]),
+ double(n)))[-ind]
+ }
+ tsp(y) <- xtsp
+ class(y) <- if (nser > 1L)
+ c("mts", "ts")
+ else "ts"
+ y
+}
+<bytecode: 0x1b05db8>
+<environment: namespace:stats>
+
#cf. filters en C dans : https://svn.r-project.org/R/trunk/src/library/stats/src/filter.c
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <R.h>
+#include "ts.h"
+
+#ifndef min
+#define min(a, b) ((a < b)?(a):(b))
+#define max(a, b) ((a < b)?(b):(a))
+#endif
+// currently ISNAN includes NAs
+#define my_isok(x) (!ISNA(x) & !ISNAN(x))
+
+#Pour method=="convolution" dans filter() (fonction R)
+SEXP cfilter(SEXP sx, SEXP sfilter, SEXP ssides, SEXP scircular)
+{
+ if (TYPEOF(sx) != REALSXP || TYPEOF(sfilter) != REALSXP)
+ error("invalid input");
+ R_xlen_t nx = XLENGTH(sx), nf = XLENGTH(sfilter);
+ int sides = asInteger(ssides), circular = asLogical(scircular);
+ if(sides == NA_INTEGER || circular == NA_LOGICAL) error("invalid input");
+
+ SEXP ans = allocVector(REALSXP, nx);
+
+ R_xlen_t i, j, nshift;
+ double z, tmp, *x = REAL(sx), *filter = REAL(sfilter), *out = REAL(ans);
+
+ if(sides == 2) nshift = nf /2; else nshift = 0;
+ if(!circular) {
+ for(i = 0; i < nx; i++) {
+ z = 0;
+ if(i + nshift - (nf - 1) < 0 || i + nshift >= nx) {
+ out[i] = NA_REAL;
+ continue;
+ }
+ for(j = max(0, nshift + i - nx); j < min(nf, i + nshift + 1) ; j++) {
+ tmp = x[i + nshift - j];
+ if(my_isok(tmp)) z += filter[j] * tmp;
+ else { out[i] = NA_REAL; goto bad; }
+ }
+ out[i] = z;
+ bad:
+ continue;
+ }
+ } else { /* circular */
+ for(i = 0; i < nx; i++)
+ {
+ z = 0;
+ for(j = 0; j < nf; j++) {
+ R_xlen_t ii = i + nshift - j;
+ if(ii < 0) ii += nx;
+ if(ii >= nx) ii -= nx;
+ tmp = x[ii];
+ if(my_isok(tmp)) z += filter[j] * tmp;
+ else { out[i] = NA_REAL; goto bad2; }
+ }
+ out[i] = z;
+ bad2:
+ continue;
+ }
+ }
+ return ans;
+}