From 572d139adaf3ca05e1c25ad29a71d3b38f0bcef8 Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Thu, 17 Nov 2016 16:56:55 +0100
Subject: [PATCH] add some comments after meeting 17-11-2016

---
 TODO                                             | 13 +++++++++++++
 code/stage2/src/02_cluster_2009.r                |  5 ++++-
 .../stage2/src/03_compute-sums-of-classes_2009.r |  2 ++
 code/stage2/src/05_cluster2stepWER.r             | 16 ++++++++++++++--
 code/stage2/src/aux.r                            |  1 +
 5 files changed, 34 insertions(+), 3 deletions(-)
 create mode 100644 TODO

diff --git a/TODO b/TODO
new file mode 100644
index 0000000..4aa334d
--- /dev/null
+++ b/TODO
@@ -0,0 +1,13 @@
+simulateur : complètement aléatoire ? from Irish dataset ?
+wavelets methods in statistics with R - p180
+
+00-convertir2009 et 2010.R
+01-extractFeatures.R pour 2009 [utilise Stbr.R] (car on prédit 2010, pas besoin)
+02-cluster2009.R
+03-compute-sum-of-classes2009.R
+05-cluster2WER-2009.R
+06-prediction.R
+
+Essayer distance wdist du package biwavelet ?
+
+geometric structure of high dim data and dim reduction 2011
diff --git a/code/stage2/src/02_cluster_2009.r b/code/stage2/src/02_cluster_2009.r
index f08d82b..3514ce7 100644
--- a/code/stage2/src/02_cluster_2009.r
+++ b/code/stage2/src/02_cluster_2009.r
@@ -24,7 +24,7 @@ sdcontrib <- apply(matcontrib0, 1, sd)
 lims      <- quantile(sdcontrib, probs = c(.005, .995)) # obtain 1%-extreme data
 is_normal <- which((sdcontrib > lims[1]) & (sdcontrib < lims[2]))
 
-matcontri_ext <- matcontrib0[-is_normal, ]""
+matcontri_ext <- matcontrib0[-is_normal, ]#""
 matcontrib    <- matcontrib0[is_normal, ]     # wipe out aberrant data
 
 matcontrib <- t(apply(matcontrib, 1, function(x) x / sum(x)))
@@ -39,6 +39,9 @@ selvar <- ci$selectv
 ## c. Clustering  ##########
 K <- 200
 system.time(
+
+#TODO: cette partie en C
+
   clfit <- clara(x        = tdata[, selvar], 
                  k        = K, 
                  sampsize = 4000, 
diff --git a/code/stage2/src/03_compute-sums-of-classes_2009.r b/code/stage2/src/03_compute-sums-of-classes_2009.r
index 0623ba1..20d2d18 100644
--- a/code/stage2/src/03_compute-sums-of-classes_2009.r
+++ b/code/stage2/src/03_compute-sums-of-classes_2009.r
@@ -54,6 +54,8 @@ for(b in seq_along(blocks)){        # Reading loop
   
 #  if(length(which(rownames(datamat) == "218095"))>0) print(b);
   
+
+  #TODO: en C : lourd
   for(k in 1:K){ 
     clustk <- which(clustfactor == k)
     if(length(clustk) > 0) {
diff --git a/code/stage2/src/05_cluster2stepWER.r b/code/stage2/src/05_cluster2stepWER.r
index dcc7fa3..9b55da1 100644
--- a/code/stage2/src/05_cluster2stepWER.r
+++ b/code/stage2/src/05_cluster2stepWER.r
@@ -9,6 +9,8 @@ library(Rwave)       # CWT
 library(cluster)     # pam
 #library(flexclust)   # kcca
 source("aux.r")               # auxiliary clustering functions 
+
+#TODO: [plus tard] alternative à sowa (package disparu) : cwt..
 source("sowas-superseded.r")  # auxiliary CWT functions
 
 ## 1. Read auxiliar data files ####
@@ -25,8 +27,9 @@ synchros09 <- t(as.matrix(read.table("~/tmp/2009_synchros200RC.txt")))
 #synchros09 <- t(as.matrix(read.table("~/tmp/2009_synchros200-random.txt")))
 
 nas <- which(is.na(synchros09)[, 1]) # some 1/1/2009 are missing
-synchros09[nas, 1] <- rowMeans(synchros09[nas, 2:4])
+synchros09[nas, 1] <- rowMeans(synchros09[nas, 2:4]) #valeurs après 1er janvier
 
+#moyenne pondérée pour compléter deux demi-heures manquantes
 imput09 <- synchros09[, 4180:4181] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2)
 synchros09 <- cbind(synchros09[, 1:4180], imput09, synchros09[, 4181:17518])
 
@@ -47,14 +50,23 @@ rm(synchros09, nas)
 # scalevector4  <- 2^(4:(noctave4 * nvoice) / nvoice) * 2
 # lscvect4      <- length(scalevector4)
 # lscvect <- lscvect4  # i should clean my code: werFam demands a lscvect
+
+
+#17000 colonnes coeff 1, puis 17000 coeff 2... [non : dans chaque tranche du cube]
+
+#TODO: une fonction qui fait lignes 59 à 91
+
+#cube:
 # Xcwt4   <- toCWT(conso, noctave = noctave4, dt = 1,
 #                 scalevector = scalevector4,
 #                 lt = delta, smooth = FALSE, 
 #                 nvoice = nvoice)      # observations node with CWT
 # 
-# #Xcwt2 <- matrix(0.0, nrow= n, ncol= 2 + delta * lscvect)
+# #matrix:
+# ############Xcwt2 <- matrix(0.0, nrow= n, ncol= 2 + delta * lscvect)
 # #Xcwt2 <- matrix(NA_complex_, nrow= n, ncol= 2 + length((c(Xcwt4[,,1]))))
 # 
+# #NOTE: delta et lscvect pourraient etre gardés à part (communs)
 # for(i in 1:n) 
 #  Xcwt2[i,] <- c(delta, lscvect, Xcwt4[,,i] / max(Mod(Xcwt4[,,i])) ) 
 # 
diff --git a/code/stage2/src/aux.r b/code/stage2/src/aux.r
index bb9077b..15021c0 100644
--- a/code/stage2/src/aux.r
+++ b/code/stage2/src/aux.r
@@ -18,6 +18,7 @@
   ########################################################
 
 
+##NOTE: renvoie une matrice 3D
   toCWT  <- function(X, sw=  0,  tw=  0, swabs= 0,
                        nvoice= 12, noctave= 5, 
                        s0= 2, w0= 2*pi, lt= 24, dt= 0.5,
-- 
2.44.0