From dc1aa85a96bbf815b0d896c22a9b4a539a9e8a9c Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Mon, 30 Jan 2017 18:52:10 +0100 Subject: [PATCH] 'update' --- TODO | 18 +++ epclust/R/main.R | 4 +- epclust/R/stage2.R | 139 ++++++++++++++++++ .../src/05_cluster2stepWER.r | 8 +- 4 files changed, 166 insertions(+), 3 deletions(-) create mode 100644 epclust/R/stage2.R diff --git a/TODO b/TODO index 7e6aa90..3c1fd78 100644 --- a/TODO +++ b/TODO @@ -48,3 +48,21 @@ utiliser du mixmod avec modèles allongés doit toutner sur machine plutôt standard, utilisateur "lambda" utiliser Rcpp ? +===== + +trategies for upscaling +From 25K to 25M : in 1000 chunks of 25K +Reference values : + K 0 = 200 super consumers (SC) + K ∗ = 15 nal clusters +1st strategy + Do 1000 times ONLY Energycon's 1st-step strategy on 25K clients + With the 1000 × K 0 SC perform a 2-step run leading to K ∗ clusters + +--> il faut s'arranger pour que + +2nd strategy + Do 1000 times Energycon's 2-step strategy on 25K clients leading to + 1000 × K ∗ intermediate clusters + Treat the intermediate clusters as individual curves and perform a + single 2-step run to get K ∗ nal clusters diff --git a/epclust/R/main.R b/epclust/R/main.R index 6746d88..eded952 100644 --- a/epclust/R/main.R +++ b/epclust/R/main.R @@ -52,7 +52,7 @@ epclust = function(data, K, nb_series_per_chunk, min_series_per_chunk=10*K, stop("read/writeTmp should be functional (see defaults.R)") if (WER!="end" && WER!="mix") stop("WER takes values in {'end','mix'}") - #concerning ncores, any non-integer type will be treated as "use parallel:detectCores()" + #concerning ncores, any non-integer type will be treated as "use parallel:detectCores()/4" #1) acquire data (process curves, get as coeffs) #TODO: for data.frame and custom function, run in parallel (connections are sequential[?!]) @@ -98,7 +98,7 @@ epclust = function(data, K, nb_series_per_chunk, min_series_per_chunk=10*K, #2) process coeffs (by nb_series_per_chunk) and cluster them in parallel library(parallel) - ncores = ifelse(is.integer(ncores), ncores, parallel::detectCores()) + ncores = ifelse(is.integer(ncores), ncores, parallel::detectCores()%/%4) cl = parallel::makeCluster(ncores) parallel::clusterExport(cl=cl, varlist=c("TODO:", "what", "to", "export?"), envir=environment()) #TODO: be careful of writing to a new temp file, then flush initial one, then re-use it... diff --git a/epclust/R/stage2.R b/epclust/R/stage2.R new file mode 100644 index 0000000..f952da2 --- /dev/null +++ b/epclust/R/stage2.R @@ -0,0 +1,139 @@ +#Entrée : courbes synchrones, soit après étape 1 itérée, soit après chaqure étape 1 + +#(Benjamin) +#à partir de là, "conso" == courbes synchrones +n <- nrow(conso) +delta <- ncol(conso) + + +#17000 colonnes coeff 1, puis 17000 coeff 2... [non : dans chaque tranche du cube] + +#TODO: une fonction qui fait lignes 59 à 91 + +#cube: +# Xcwt4 <- toCWT(conso, noctave = noctave4, dt = 1, +# scalevector = scalevector4, +# lt = delta, smooth = FALSE, +# nvoice = nvoice) # observations node with CWT +# +# #matrix: +# ############Xcwt2 <- matrix(0.0, nrow= n, ncol= 2 + delta * lscvect) +# #Xcwt2 <- matrix(NA_complex_, nrow= n, ncol= 2 + length((c(Xcwt4[,,1])))) +# +# #NOTE: delta et lscvect pourraient etre gardés à part (communs) +# for(i in 1:n) +# Xcwt2[i,] <- c(delta, lscvect, Xcwt4[,,i] / max(Mod(Xcwt4[,,i])) ) +# +# #rm(conso, Xcwt4); gc() +# +# ## _.b WER^2 distances ######## +# Xwer_dist <- matrix(0.0, n, n) +# for(i in 1:(n - 1)){ +# mat1 <- vect2mat(Xcwt2[i,]) +# for(j in (i + 1):n){ +# mat2 <- vect2mat(Xcwt2[j,]) +# num <- Mod(mat1 * Conj(mat2)) +# WX <- Mod(mat1 * Conj(mat1)) +# WY <- Mod(mat2 * Conj(mat2)) +# smsmnum <- smCWT(num, scalevector = scalevector4) +# smsmWX <- smCWT(WX, scalevector = scalevector4) +# smsmWY <- smCWT(WY, scalevector = scalevector4) +# wer2 <- sum(colSums(smsmnum)^2) / +# sum( sum(colSums(smsmWX) * colSums(smsmWY)) ) +# Xwer_dist[i, j] <- sqrt(delta * lscvect * (1 - wer2)) +# Xwer_dist[j, i] <- Xwer_dist[i, j] +# } +# } +# diag(Xwer_dist) <- numeric(n) +# +# save(Xwer_dist, file = "../res/2009_synchros200WER.Rdata") +# save(Xwer_dist, file = "../res/2009_synchros200-randomWER.Rdata") + + + +#lignes 59 à 91 "dépliées" : +Xcwt4 <- toCWT(conso, noctave = noctave4, dt = 1, + scalevector = scalevector4, + lt = delta, smooth = FALSE, + nvoice = nvoice) # observations node with CWT + + #matrix: + ############Xcwt2 <- matrix(0.0, nrow= n, ncol= 2 + delta * lscvect) + Xcwt2 <- matrix(NA_complex_, nrow= n, ncol= 2 + length((c(Xcwt4[,,1])))) + + #NOTE: delta et lscvect pourraient etre gardés à part (communs) + for(i in 1:n) + Xcwt2[i,] <- c(delta, lscvect, Xcwt4[,,i] / max(Mod(Xcwt4[,,i])) ) + + #rm(conso, Xcwt4); gc() + + ## _.b WER^2 distances ######## + Xwer_dist <- matrix(0.0, n, n) + for(i in 1:(n - 1)){ + mat1 <- vect2mat(Xcwt2[i,]) + + #NOTE: vect2mat = as.matrix ?! (dans aux.R) + vect2mat <- function(vect){ + vect <- as.vector(vect) + matrix(vect[-(1:2)], delta, lscvect) + } + + for(j in (i + 1):n){ + mat2 <- vect2mat(Xcwt2[j,]) + num <- Mod(mat1 * Conj(mat2)) + WX <- Mod(mat1 * Conj(mat1)) + WY <- Mod(mat2 * Conj(mat2)) + smsmnum <- smCWT(num, scalevector = scalevector4) + smsmWX <- smCWT(WX, scalevector = scalevector4) + smsmWY <- smCWT(WY, scalevector = scalevector4) + wer2 <- sum(colSums(smsmnum)^2) / + sum( sum(colSums(smsmWX) * colSums(smsmWY)) ) + Xwer_dist[i, j] <- sqrt(delta * lscvect * (1 - wer2)) + Xwer_dist[j, i] <- Xwer_dist[i, j] + } + } + diag(Xwer_dist) <- numeric(n) + +#fonction smCWT (dans aux.R) + smCWT <- function(CWT, sw= 0, tw= 0, swabs= 0, + nvoice= 12, noctave= 2, s0= 2, w0= 2*pi, + lt= 24, dt= 0.5, scalevector ) + { +# noctave <- adjust.noctave(lt, dt, s0, tw, noctave) +# scalevector <- 2^(0:(noctave * nvoice) / nvoice) * s0 + wsp <- Mod(CWT) + smwsp <- smooth.matrix(wsp, swabs) + smsmwsp <- smooth.time(smwsp, tw, dt, scalevector) + smsmwsp + } + + #dans sowas.R +smooth.matrix <- function(wt,swabs){ + + if (swabs != 0) + smwt <- t(filter(t(wt),rep(1,2*swabs+1)/(2*swabs+1))) + else + smwt <- wt + + smwt + +} +smooth.time <- function(wt,tw,dt,scalevector){ + + smwt <- wt + + if (tw != 0){ + for (i in 1:length(scalevector)){ + + twi <- as.integer(scalevector[i]*tw/dt) + smwt[,i] <- filter(wt[,i],rep(1,2*twi+1)/(2*twi+1)) + + } + } + smwt +} + +#et filter() est dans stats:: + +#cf. filters en C dans : https://svn.r-project.org/R/trunk/src/library/stats/src/filter.c + diff --git a/old_C_code/stage2_UNFINISHED/src/05_cluster2stepWER.r b/old_C_code/stage2_UNFINISHED/src/05_cluster2stepWER.r index 69939b2..aa35d0c 100644 --- a/old_C_code/stage2_UNFINISHED/src/05_cluster2stepWER.r +++ b/old_C_code/stage2_UNFINISHED/src/05_cluster2stepWER.r @@ -45,13 +45,19 @@ rm(synchros09, nas) nvoice <- 4 # # noctave4 = 2^13 = 8192 half hours ~ 180 days noctave4 <- adjust.noctave(N = delta, dt = 1, s0 = 2, -# tw = 0, noctave = 13) + tw = 0, noctave = 13) # # 4 here represent 2^5 = 32 half-hours ~ 1 day scalevector4 <- 2^(4:(noctave4 * nvoice) / nvoice) * 2 lscvect4 <- length(scalevector4) lscvect <- lscvect4 # i should clean my code: werFam demands a lscvect +#(Benjamin) +#à partir de là, "conso" == courbes synchrones +n <- nrow(conso) +delta <- ncol(conso) + + #17000 colonnes coeff 1, puis 17000 coeff 2... [non : dans chaque tranche du cube] #TODO: une fonction qui fait lignes 59 à 91 -- 2.44.0