old_C_code/stage2_UNFINISHED/src/unused/06_predictions-par.r

   1 ## File : 06_predictions.r
   2 ## Description : Calculer les synchrones pour chaque groupe
   3 ##               obtenu par le clustering.
   4
   5 rm(list = ls())
   6
   7 if(Sys.info()[4] ==  "mojarrita"){
   8   setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
   9 } else {
  10   setwd("~/2014_EDF-Orsay-Lyon2/codes/")
  11 }
  12
  13 ## 1. Read auxiliar data files ####
  14
  15 identifiants <- read.table("identifs.txt")[ ,1]
  16 dates0       <- read.table("datesall.txt")[, 1]
  17 dates09      <- as.character(dates0[grep("2009", dates0)])
  18 dates10      <- as.character(dates0[grep("2010", dates0)])
  19 dates        <- c(dates09, dates10)
  20 rm(dates0, dates09, dates10)
  21
  22 n <- length(identifiants)
  23 p <- length(dates)
  24
  25 # groups for the predictor
  26 gr_calen <- read.table("calendar_transition_groups-1996-2011.txt")
  27 i_start_calen <- which(rownames(gr_calen) == "2009-01-01")
  28 i_end_calen   <- which(rownames(gr_calen) == "2010-12-31")
  29 gr <- gr_calen$gr[i_start_calen:i_end_calen]
  30 gr <- gsub("Thu", "TWT", gr)
  31 gr <- gsub("Wed", "TWT", gr)
  32 gr <- gsub("Tue", "TWT", gr)
  33
  34 days <- seq.Date(from = as.Date("2009/01/01"),
  35                  to   = as.Date("2010/12/31"),
  36                  by   = 'day')
  37
  38 #synchros09 <- as.matrix(read.table("~/tmp/2009_synchros200.txt"))
  39 #synchros10 <- as.matrix(read.table("~/tmp/2010_synchros200.txt"))
  40 synchros09 <- as.matrix(read.table("~/tmp/2009_synchros200RC.txt"))
  41 synchros10 <- as.matrix(read.table("~/tmp/2010_synchros200RC.txt"))
  42
  43 synchros09 <- t(synchros09) # series must be on rows
  44 synchros10 <- t(synchros10) # series must be on rows
  45
  46 ## 2. Imputation of missing values ####
  47 #     (done by linear interpolation)
  48 nas <- which(is.na(synchros09)[, 1]) # some 1/1/2009 are missing
  49 synchros09[nas, 1] <- rowMeans(synchros09[nas, 2:4])
  50
  51 imput09 <- synchros09[, 4180:4181] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2)
  52 imput10 <- synchros10[, 4132:4133] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2)
  53
  54 synchros09 <- cbind(synchros09[, 1:4180], imput09, synchros09[, 4181:17518])
  55 synchros10 <- cbind(synchros10[, 1:4132], imput10, synchros10[, 4133:17518])
  56
  57
  58 rm(imput09, imput10, nas)
  59 rm(gr_calen, i_start_calen, i_end_calen)
  60
  61 # Set start and end timepoints for the prediction
  62 i_00      <- which(days == "2009-01-01")
  63 n_00      <- which(days == "2009-12-31")
  64 n_01      <- which(days == "2010-12-31")
  65
  66
  67 ## 3. Construct predictors ####
  68 library(kerwavfun)
  69 synchros <- (cbind( synchros09, synchros10 ))[-201, ]
  70
  71 #clustfactors <- dir("~/tmp/", pattern = 'clustfactorAC')
  72 #clustfactors <- dir("~/tmp/", pattern = 'clustfactorRC')
  73 clustfactors <- dir("~/tmp/", pattern = 'clustfactorWER')
  74
  75 maxK <- length(clustfactors) - 1
  76
  77 performK <- data.frame(K    = rep(NA_real_, maxK),
  78                        mape = rep(NA_real_, maxK),
  79                        rmse = rep(NA_real_, maxK))
  80
  81 setwd("~/tmp/")
  82 for(cf in seq_along(clustfactors)) {
  83   #clustfactor <- read.table(file = "~/tmp/clustfactor3.txt")
  84   clustfactor <- read.table(file = clustfactors[cf])
  85
  86   K <- nrow(unique(clustfactor))
  87
  88   performK[cf, 1] <- K
  89
  90   SC <- matrix(0, ncol = ncol(synchros), nrow = K)
  91   for(k in 1:K){
  92     clustk <- which(clustfactor == k)
  93     if(length(clustk) > 0) {
  94       if(length(clustk) > 1) {
  95         SCk <- colSums(synchros[which(clustfactor == k), ])
  96       } else {
  97         SCk <- synchros[which(clustfactor == k), ]
  98       }
  99       SC[k, ] <- SC[k, ] + SCk
 100       rm(SCk)
 101     }
 102   }
 103
 104   mat_Load <- matrix(colSums(SC),   ncol = 48, byrow = TRUE)
 105   mat_ks   <- lapply(1:K, function(k) matrix(SC[k, ], ncol = 48, byrow = TRUE))
 106
 107
 108   ## 2. Transform to wkdata (performs DWT) ##
 109
 110   wk_Load <- wkdata(X  = c(t(mat_Load)),
 111                     gr = gr[1:nrow(mat_Load)],
 112                     p  = 48)
 113
 114   wks <- lapply(mat_ks, function(mat_k)
 115           wkdata(X = c(t(mat_k)), gr = gr[1:nrow(mat_Load)], p = 48))
 116
 117   ## 3. Begin of big loop over ##
 118   grid <- n_00:(n_01 - 1) # these are the reference days
 119   #h_Load <- h_k1 <- h_k2 <- h_k3 <- double(length(grid))
 120
 121   # output matrix of predicted days
 122   # names of the predicted days (reference days + 1)
 123   pred_Load <- matrix(NA_real_, ncol = 48, nrow = length(grid))
 124   preds     <- lapply(1:K, function(k) pred_Load)
 125
 126   go <- Sys.time()
 127   for(q in seq_along(grid)[-c(121, 122, 227, 228)]) { #seq_along(grid)[-c(46:47)]
 128     if((q %/% 10) == (q / 10)) cat(paste("  iteration: ", q, "\n"))
 129     Q  <- grid[q]
 130
 131     select_Load <- select.wkdata(wk_Load, i_00:Q)
 132     aux_Load  <- wavkerfun(obj = select_Load, kerneltype = "gaussian",
 133                            EPS = .Machine$double.eps)
 134     pred_Load[q, ] <- predict.wavkerfun(obj = aux_Load, cent = "DIFF")$X
 135
 136     selects <- lapply(wks, function(k) select.wkdata(k, i_00:Q))
 137     auxs    <- lapply(selects,
 138                       function(k) wavkerfun(obj = k,
 139                                           kerneltype = "gaussian",
 140                                           EPS = .Machine$double.eps))
 141     for(k in 1:K)
 142       preds[[k]][q, ] <- predict.wavkerfun(obj = auxs[[k]], cent = "DIFF")$X
 143
 144   }
 145
 146   top <- Sys.time()
 147   #rm(wks, selects, auxs, aux_Load, mat_ks, q, Q, wk_Load)
 148
 149   # Compute the prediction of synchrone by the sum of individuals
 150   pred_Load_sum <- Reduce('+', preds)
 151
 152   ## 4. Analisis of the predictions  ####
 153   resids_Load     <- pred_Load     - mat_Load[grid + 1, ] # i.e. reference days + 1
 154   resids_Load_sum <- pred_Load_sum - mat_Load[grid + 1, ] # i.e. reference days + 1
 155
 156   mape_Load <- 100 * rowMeans(abs(resids_Load / mat_Load[grid + 1, ]))
 157   rmse_Load <- sqrt(rowMeans(resids_Load^2))
 158
 159   mape_Load_sum <- 100 * rowMeans(abs(resids_Load_sum / mat_Load[grid + 1, ]))
 160   rmse_Load_sum <- sqrt(rowMeans(resids_Load_sum^2))
 161
 162   performK[cf, 2] <- mean(mape_Load, na.rm = TRUE)
 163   performK[cf, 3] <- mean(mape_Load_sum, na.rm = TRUE)
 164 #print(mean(mape_Load, na.rm = TRUE))
 165 #print(mean(mape_Load_sum, na.rm = TRUE))
 166
 167 }
 168
 169
 170 performKwer <- performK
 171
 172 #performKrc <- read.table("~/Documents/projects/2014_EDF-Orsay-Lyon2/res/perform200RC.txt")
 173 #write.table(file = '~/Documents/projects/2014_EDF-Orsay-Lyon2/res/perform200RC.txt',
 174 #            performK)
 175 write.table(file = '~/Documents/projects/2014_EDF-Orsay-Lyon2/res/perform200WER_3.txt',
 176             performK)
 177
 178 #performK <- data.frame(K   = performKrc$K,
 179 #                       rc  = performKrc$rmse,
 180 #                       wer = performKwer$rmse)
 181
 182
 183 performK <- performK[order(performK$K), ]
 184
 185
 186 #plot(performK$K, performK$rc, type = 'l', ylim = c(1.7,2.2),
 187 #     lwd = 2, col = 2)
 188 #lines(performK$K, performK$wer, col = 3, lwd = 2)
 189 #abline(h = mean(performKrc$mape), lwd = 2)
 190 #legend("topright", c('Sync 32K', 'Cl. RC', "Cl. WER"), lwd = 2, col = 1:3)