old_C_code/stage2_UNFINISHED/src/06_predictions.r

   1 ## File : 06_predictions.r
   2 ## Description : Calculer les synchrones pour chaque groupe
   3 ##               obtenu par le clustering.
   4
   5 rm(list = ls())
   6
   7 setwd("~/ownCloud/projects/2014_EDF-Orsay-Lyon2/codes/")
   8
   9
  10 ## 1. Read auxiliar data files ####
  11
  12 identifiants <- read.table("identifs.txt")[ ,1]
  13 dates0       <- read.table("datesall.txt")[, 1]
  14 dates09      <- as.character(dates0[grep("2009", dates0)])
  15 dates10      <- as.character(dates0[grep("2010", dates0)])
  16 dates        <- c(dates09, dates10)
  17 rm(dates0, dates09, dates10)
  18
  19 n <- length(identifiants)
  20 p <- length(dates)
  21
  22 # groups for the predictor
  23 gr_calen <- read.table("calendar_transition_groups-1996-2011.txt")
  24 i_start_calen <- which(rownames(gr_calen) == "2009-01-01")
  25 i_end_calen   <- which(rownames(gr_calen) == "2010-12-31")
  26 gr <- gr_calen$gr[i_start_calen:i_end_calen]
  27 gr <- gsub("Thu", "TWT", gr)
  28 gr <- gsub("Wed", "TWT", gr)
  29 gr <- gsub("Tue", "TWT", gr)
  30
  31 days <- seq.Date(from = as.Date("2009/01/01"),
  32                  to   = as.Date("2010/12/31"),
  33                  by   = 'day')
  34
  35 #synchros09 <- as.matrix(read.table("~/tmp/2009_synchros200.txt"))
  36 #synchros10 <- as.matrix(read.table("~/tmp/2010_synchros200.txt"))
  37 #synchros09 <- as.matrix(read.table("~/tmp/2009_synchros200RC.txt"))
  38 #synchros10 <- as.matrix(read.table("~/tmp/2010_synchros200RC.txt"))
  39 synchros09 <- as.matrix(read.table("~/tmp/2009_synchros200-random.txt"))
  40 synchros10 <- as.matrix(read.table("~/tmp/2010_synchros200-random.txt"))
  41
  42
  43 synchros09 <- t(synchros09) # series must be on rows
  44 synchros10 <- t(synchros10) # series must be on rows
  45
  46 ## 2. Imputation of missing values ####
  47 #     (done by linear interpolation)
  48 nas <- which(is.na(synchros09)[, 1]) # some 1/1/2009 are missing
  49 synchros09[nas, 1] <- rowMeans(synchros09[nas, 2:4])
  50
  51 imput09 <- synchros09[, 4180:4181] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2)
  52 imput10 <- synchros10[, 4132:4133] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2)
  53
  54 synchros09 <- cbind(synchros09[, 1:4180], imput09, synchros09[, 4181:17518])
  55 synchros10 <- cbind(synchros10[, 1:4132], imput10, synchros10[, 4133:17518])
  56
  57
  58 rm(imput09, imput10, nas)
  59 rm(gr_calen, i_start_calen, i_end_calen)
  60
  61 # Set start and end timepoints for the prediction
  62 i_00      <- which(days == "2009-01-01")
  63 n_00      <- which(days == "2009-12-31")
  64 n_01      <- which(days == "2010-12-31")
  65
  66
  67 ## 3. Construct predictors ####
  68 library(kerwavfun)
  69 synchros <- (cbind( synchros09, synchros10 ))[-201, ]
  70
  71 #clustfactors <- dir("~/tmp/", pattern = 'clustfactorAC')
  72 #clustfactors <- dir("~/tmp/", pattern = 'clustfactorRC')
  73 #clustfactors <- dir("~/tmp/", pattern = 'clustfactorWER')
  74 #clustfactors <- dir("~/tmp/", pattern = 'clustfactorRANDOM')
  75 clustfactors <- dir("~/tmp/", pattern = 'clustfactor-randomWER')
  76
  77 maxK <- length(clustfactors) - 1
  78
  79 performK <- data.frame(K    = rep(NA_real_, maxK),
  80                        mape = rep(NA_real_, maxK),
  81                        rmse = rep(NA_real_, maxK))
  82
  83 setwd("~/tmp/")
  84 for(cf in seq_along(clustfactors)) {
  85 #  clustfactor <- read.table(file = "~/tmp/clustfactor3.txt")
  86   clustfactor <- read.table(file = clustfactors[cf])
  87
  88   K <- nrow(unique(clustfactor))
  89
  90   performK[cf, 1] <- K
  91
  92   SC <- matrix(0, ncol = ncol(synchros), nrow = K)
  93   for(k in 1:K){
  94     clustk <- which(clustfactor == k)
  95     if(length(clustk) > 0) {
  96       if(length(clustk) > 1) {
  97         SCk <- colSums(synchros[which(clustfactor == k), ])
  98       } else {
  99         SCk <- synchros[which(clustfactor == k), ]
 100       }
 101       SC[k, ] <- SC[k, ] + SCk
 102       rm(SCk)
 103     }
 104   }
 105
 106   mat_Load <- matrix(colSums(SC),   ncol = 48, byrow = TRUE)
 107   mat_ks   <- lapply(1:K, function(k) matrix(SC[k, ], ncol = 48, byrow = TRUE))
 108
 109
 110   ## 2. Transform to wkdata (performs DWT) ##
 111
 112   wk_Load <- wkdata(X  = c(t(mat_Load)),
 113                     gr = gr[1:nrow(mat_Load)],
 114                     p  = 48)
 115
 116   wks <- lapply(mat_ks, function(mat_k)
 117           wkdata(X = c(t(mat_k)), gr = gr[1:nrow(mat_Load)], p = 48))
 118
 119   ## 3. Begin of big loop over ##
 120   grid <- n_00:(n_01 - 1) # these are the reference days
 121   #h_Load <- h_k1 <- h_k2 <- h_k3 <- double(length(grid))
 122
 123   # output matrix of predicted days
 124   # names of the predicted days (reference days + 1)
 125   pred_Load <- matrix(NA_real_, ncol = 48, nrow = length(grid))
 126   preds     <- lapply(1:K, function(k) pred_Load)
 127
 128   go <- Sys.time()
 129   for(q in seq_along(grid)[-c(121, 122, 227, 228)]) { #seq_along(grid)[-c(46:47)]
 130     if((q %/% 50) == (q / 50)) cat(paste("  iteration: ", q, "\n"))
 131     Q  <- grid[q]
 132
 133     select_Load <- select.wkdata(wk_Load, i_00:Q)
 134     aux_Load  <- wavkerfun(obj = select_Load, kerneltype = "gaussian",
 135                            EPS = .Machine$double.eps)
 136     pred_Load[q, ] <- predict.wavkerfun(obj = aux_Load, cent = "DIFF")$X
 137
 138     selects <- lapply(wks, function(k) select.wkdata(k, i_00:Q))
 139     auxs    <- lapply(selects,
 140                       function(k) wavkerfun(obj = k,
 141                                           kerneltype = "gaussian",
 142                                           EPS = .Machine$double.eps))
 143     for(k in 1:K)
 144       preds[[k]][q, ] <- predict.wavkerfun(obj = auxs[[k]], cent = "DIFF")$X
 145
 146   }
 147
 148   top <- Sys.time()
 149   #rm(wks, selects, auxs, aux_Load, mat_ks, q, Q, wk_Load)
 150
 151   # Compute the prediction of synchrone by the sum of individuals
 152   pred_Load_sum <- Reduce('+', preds)
 153
 154   ## 4. Analisis of the predictions  ####
 155   resids_Load     <- pred_Load     - mat_Load[grid + 1, ] # i.e. reference days + 1
 156   resids_Load_sum <- pred_Load_sum - mat_Load[grid + 1, ] # i.e. reference days + 1
 157
 158   mape_Load <- 100 * rowMeans(abs(resids_Load / mat_Load[grid + 1, ]))
 159   rmse_Load <- sqrt(rowMeans(resids_Load^2))
 160
 161   mape_Load_sum <- 100 * rowMeans(abs(resids_Load_sum / mat_Load[grid + 1, ]))
 162   rmse_Load_sum <- sqrt(rowMeans(resids_Load_sum^2))
 163
 164   performK[cf, 2] <- mean(mape_Load, na.rm = TRUE)
 165   performK[cf, 3] <- mean(mape_Load_sum, na.rm = TRUE)
 166 #print(mean(mape_Load, na.rm = TRUE))
 167 #print(mean(mape_Load_sum, na.rm = TRUE))
 168
 169 }
 170
 171
 172 #performKrc <- read.table("../res/perform200RC.txt")
 173 #write.table(file = '~/ownCloud/projects/2014_EDF-Orsay-Lyon2/res/perform200-random.txt',
 174 #            performK)
 175 #write.table(file = '~/Documents/projects/2014_EDF-Orsay-Lyon2/res/perform200RC.txt',
 176 #            performK)
 177 #write.table(file = '~/Documents/projects/2014_EDF-Orsay-Lyon2/res/perform200WER_3.txt',
 178 #            performK)
 179
 180 #performK <- data.frame(K   = performKrc$K,
 181 #                       rc  = performKrc$rmse,
 182 #                       wer = performKwer$rmse)
 183
 184 #perform <- rbind(performK, performKwer)
 185 #perform <- perform[order(perform$K), ]
 186
 187 plot(perform$K, perform$rmse, type = 'l', ylim = c(1.7,2.2),
 188      lwd = 2, col = 2, xlab = "Nb Cluster", ylab = "MAPE (en  %)")
 189 lines(performK$K, performKrc$rmse, col = 3, lwd = 2)
 190 abline(h = mean(performK$mape), lwd = 2)
 191 legend("topright", c('AGG', 'DIS euclid', "DIS WER"), lwd = 2, col = 1:3)
 192
 193
 194 ## Plot article ENERGYCON
 195 performKwer <- read.table("../res/perform200WER.txt")
 196 performKwer <- performKwer[order(performKwer$K), ]
 197
 198 pdf("~/perf_wer.pdf")
 199 plot(performKwer$K, performKwer$rmse, type = 'l',
 200      ylim = c(1.7,2.2), lwd = 2, col = 2,
 201      xlab = "Nb Cluster", ylab = "MAPE (en  %)")
 202 abline(h = mean(performKwer$mape), lwd = 2)
 203 legend("topright", c('AGG', "DIS WER"), lwd = 2, col = 1:2)
 204 dev.off()