| 1 | ## File : 06_predictions-ICAME.r |
| 2 | ## Description : Prévoir les synchrones des classes ICAME. |
| 3 | |
| 4 | |
| 5 | rm(list = ls()) |
| 6 | |
| 7 | if(Sys.info()[4] == "mojarrita"){ |
| 8 | setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/") |
| 9 | } else { |
| 10 | setwd("~/2014_EDF-Orsay-Lyon2/codes/") |
| 11 | } |
| 12 | |
| 13 | ## 1. Read auxiliar data files #### |
| 14 | #identifiants <- read.table("identifs.txt")[ ,1] |
| 15 | dates0 <- read.table("datesall.txt")[, 1] |
| 16 | dates09 <- as.character(dates0[grep("2009", dates0)]) |
| 17 | dates10 <- as.character(dates0[grep("2010", dates0)]) |
| 18 | dates <- c(dates09, dates10) |
| 19 | rm(dates0, dates09, dates10) |
| 20 | |
| 21 | #n <- length(identifiants) |
| 22 | p <- length(dates) |
| 23 | |
| 24 | # groups for the predictor |
| 25 | gr_calen <- read.table("calendar_transition_groups-1996-2011.txt") |
| 26 | i_start_calen <- which(rownames(gr_calen) == "2009-01-01") |
| 27 | i_end_calen <- which(rownames(gr_calen) == "2010-12-31") |
| 28 | gr <- gr_calen$gr[i_start_calen:i_end_calen] |
| 29 | gr <- gsub("Thu", "TWT", gr) |
| 30 | gr <- gsub("Wed", "TWT", gr) |
| 31 | gr <- gsub("Tue", "TWT", gr) |
| 32 | |
| 33 | days <- seq.Date(from = as.Date("2009/01/01"), |
| 34 | to = as.Date("2010/12/31"), |
| 35 | by = 'day') |
| 36 | |
| 37 | |
| 38 | setwd("../classes ICAME/synchrones_classif_ICAME/") |
| 39 | files_ICAME <- dir(pattern = "*TR-32V*") |
| 40 | K_ICAME <- length(files_ICAME) |
| 41 | SC <- matrix(NA_real_, nrow = K_ICAME, ncol = p + 4) # NA missing in dates |
| 42 | # (summer time saving) |
| 43 | |
| 44 | for(k in 1:K_ICAME){ |
| 45 | df <- read.table(file = files_ICAME[k], header = TRUE) |
| 46 | SC[k, ] <- subset(x = df, # series must be on rows |
| 47 | subset = Year == "2009" | Year == "2010", |
| 48 | select = "Load")[[1]] |
| 49 | } |
| 50 | |
| 51 | |
| 52 | rm(df, files_ICAME, k) |
| 53 | rm(gr_calen, i_start_calen, i_end_calen) |
| 54 | |
| 55 | # Set start and end timepoints for the prediction |
| 56 | i_00 <- which(days == "2009-01-01") |
| 57 | n_00 <- which(days == "2009-12-31") |
| 58 | n_01 <- which(days == "2010-12-31") |
| 59 | |
| 60 | |
| 61 | ## 2. Transform data into wkdata #### |
| 62 | library(kerwavfun) |
| 63 | |
| 64 | mat_Load <- matrix(colSums(SC), ncol = 48, byrow = TRUE) |
| 65 | mat_ks <- lapply(1:K_ICAME, function(k) matrix(data = SC[k, ], |
| 66 | ncol = 48, |
| 67 | byrow = TRUE)) |
| 68 | |
| 69 | wk_Load <- wkdata(X = c(t(mat_Load)), |
| 70 | gr = gr[1:nrow(mat_Load)], |
| 71 | p = 48) |
| 72 | |
| 73 | wks <- lapply(mat_ks, function(mat_k) |
| 74 | wkdata(X = c(t(mat_k)), gr = gr[1:nrow(mat_Load)], p = 48)) |
| 75 | |
| 76 | ## 3. Begin of big loop over grid ## |
| 77 | grid <- n_00:(n_01 - 1) # these are the reference days |
| 78 | #h_Load <- h_k1 <- h_k2 <- h_k3 <- double(length(grid)) |
| 79 | |
| 80 | # output matrix of predicted days |
| 81 | # names of the predicted days (reference days + 1) |
| 82 | pred_Load <- matrix(NA_real_, ncol = 48, nrow = length(grid)) |
| 83 | preds <- lapply(1:K_ICAME, function(k) pred_Load) |
| 84 | |
| 85 | go <- Sys.time() |
| 86 | for(q in seq_along(grid)[-c(121, 122, 227, 228)]) { #seq_along(grid)[-c(46:47)] |
| 87 | if((q %/% 10) == (q / 10)) cat(paste(" iteration: ", q, "\n")) |
| 88 | Q <- grid[q] |
| 89 | |
| 90 | select_Load <- select.wkdata(wk_Load, i_00:Q) |
| 91 | aux_Load <- wavkerfun(obj = select_Load, kerneltype = "gaussian", |
| 92 | EPS = .Machine$double.eps) |
| 93 | pred_Load[q, ] <- predict.wavkerfun(obj = aux_Load, cent = "DIFF")$X |
| 94 | |
| 95 | selects <- lapply(wks, function(k) select.wkdata(k, i_00:Q)) |
| 96 | auxs <- lapply(selects, |
| 97 | function(k) wavkerfun(obj = k, |
| 98 | kerneltype = "gaussian", |
| 99 | EPS = .Machine$double.eps)) |
| 100 | for(k in 1:K_ICAME) |
| 101 | preds[[k]][q, ] <- predict.wavkerfun(obj = auxs[[k]], cent = "DIFF")$X |
| 102 | |
| 103 | } |
| 104 | top <- Sys.time() |
| 105 | #rm(wks, selects, auxs, aux_Load, mat_ks, q, Q, wk_Load) |
| 106 | |
| 107 | |
| 108 | preds <- lapply(preds, function(dd) {dd[dd<0] <- 0; dd} ) |
| 109 | # Compute the prediction of synchrone by the sum of individuals |
| 110 | pred_Load_sum <- Reduce('+', preds) |
| 111 | |
| 112 | |
| 113 | vect_preds <- data.frame(lapply(preds, function(dd) c(t(dd)))) |
| 114 | names(vect_preds) <- paste0("icame", 1:K_ICAME) |
| 115 | |
| 116 | vect_Load <- c(t(pred_Load)) |
| 117 | vect_gtop <- matrix(NA_real_, ncol = K_ICAME, nrow = length(vect_Load)) |
| 118 | |
| 119 | |
| 120 | for(tt in seq_along(vect_gtop)) { |
| 121 | VALID <- !any(is.na(vect_preds[tt, ])) & # compute gtop only when no NAs |
| 122 | tt < 17511 # these rows present a problem ==> why? (matbe too much 0s in preds_indiv) |
| 123 | if(VALID) { |
| 124 | res <- gtop(preds_indiv = as.numeric(vect_preds[tt, ]), |
| 125 | pred_total = vect_Load[tt], |
| 126 | weights_indiv = rep(1, K_ICAME), |
| 127 | weight_total = 1, |
| 128 | bounds_indiv = 0.05 * as.numeric(vect_preds[tt, ])) |
| 129 | vect_gtop[tt, ] <- res$preds_indiv[1:K_ICAME] # last value is the sum |
| 130 | } |
| 131 | } |
| 132 | |
| 133 | vect_gtop_sum <- rowSums(vect_gtop) |
| 134 | pred_gtop_sum <- matrix(vect_gtop_sum, ncol = 48, byrow = TRUE) |
| 135 | |
| 136 | ## 4. Analisis of the predictions #### |
| 137 | resids_Load <- pred_Load - mat_Load[grid + 1, ] # i.e. reference days + 1 |
| 138 | resids_Load_sum <- pred_Load_sum - mat_Load[grid + 1, ] # i.e. reference days + 1 |
| 139 | resids_gtop_sum <- pred_gtop_sum - mat_Load[grid + 1, ] |
| 140 | |
| 141 | mape_Load <- 100 * rowMeans(abs(resids_Load / mat_Load[grid + 1, ])) |
| 142 | rmse_Load <- sqrt(rowMeans(resids_Load^2)) |
| 143 | |
| 144 | mape_Load_sum <- 100 * rowMeans(abs(resids_Load_sum / mat_Load[grid + 1, ])) |
| 145 | rmse_Load_sum <- sqrt(rowMeans(resids_Load_sum^2)) |
| 146 | |
| 147 | mape_gtop_sum <- 100 * rowMeans(abs(resids_gtop_sum / mat_Load[grid + 1, ])) |
| 148 | rmse_gtop_sum <- sqrt(rowMeans(resids_gtop_sum^2)) |
| 149 | |
| 150 | |
| 151 | mean(mape_Load, na.rm = TRUE) |
| 152 | mean(mape_Load_sum, na.rm = TRUE) |
| 153 | mean(mape_gtop_sum, na.rm = TRUE) |
| 154 | #print(mean(mape_Load, na.rm = TRUE)) |
| 155 | #print(mean(mape_Load_sum, na.rm = TRUE)) |
| 156 | |
| 157 | |
| 158 | mean(rmse_Load, na.rm = TRUE) |
| 159 | mean(rmse_Load_sum, na.rm = TRUE) |
| 160 | mean(rmse_gtop_sum, na.rm = TRUE) |