## File : 06_predictions-ICAME.r ## Description : Prévoir les synchrones des classes ICAME. rm(list = ls()) if(Sys.info()[4] == "mojarrita"){ setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/") } else { setwd("~/2014_EDF-Orsay-Lyon2/codes/") } ## 1. Read auxiliar data files #### #identifiants <- read.table("identifs.txt")[ ,1] dates0 <- read.table("datesall.txt")[, 1] dates09 <- as.character(dates0[grep("2009", dates0)]) dates10 <- as.character(dates0[grep("2010", dates0)]) dates <- c(dates09, dates10) rm(dates0, dates09, dates10) #n <- length(identifiants) p <- length(dates) # groups for the predictor gr_calen <- read.table("calendar_transition_groups-1996-2011.txt") i_start_calen <- which(rownames(gr_calen) == "2009-01-01") i_end_calen <- which(rownames(gr_calen) == "2010-12-31") gr <- gr_calen$gr[i_start_calen:i_end_calen] gr <- gsub("Thu", "TWT", gr) gr <- gsub("Wed", "TWT", gr) gr <- gsub("Tue", "TWT", gr) days <- seq.Date(from = as.Date("2009/01/01"), to = as.Date("2010/12/31"), by = 'day') setwd("../classes ICAME/synchrones_classif_ICAME/") files_ICAME <- dir(pattern = "*TR-32V*") K_ICAME <- length(files_ICAME) SC <- matrix(NA_real_, nrow = K_ICAME, ncol = p + 4) # NA missing in dates # (summer time saving) for(k in 1:K_ICAME){ df <- read.table(file = files_ICAME[k], header = TRUE) SC[k, ] <- subset(x = df, # series must be on rows subset = Year == "2009" | Year == "2010", select = "Load")[[1]] } rm(df, files_ICAME, k) rm(gr_calen, i_start_calen, i_end_calen) # Set start and end timepoints for the prediction i_00 <- which(days == "2009-01-01") n_00 <- which(days == "2009-12-31") n_01 <- which(days == "2010-12-31") ## 2. Transform data into wkdata #### library(kerwavfun) mat_Load <- matrix(colSums(SC), ncol = 48, byrow = TRUE) mat_ks <- lapply(1:K_ICAME, function(k) matrix(data = SC[k, ], ncol = 48, byrow = TRUE)) wk_Load <- wkdata(X = c(t(mat_Load)), gr = gr[1:nrow(mat_Load)], p = 48) wks <- lapply(mat_ks, function(mat_k) wkdata(X = c(t(mat_k)), gr = gr[1:nrow(mat_Load)], p = 48)) ## 3. Begin of big loop over grid ## grid <- n_00:(n_01 - 1) # these are the reference days #h_Load <- h_k1 <- h_k2 <- h_k3 <- double(length(grid)) # output matrix of predicted days # names of the predicted days (reference days + 1) pred_Load <- matrix(NA_real_, ncol = 48, nrow = length(grid)) preds <- lapply(1:K_ICAME, function(k) pred_Load) go <- Sys.time() for(q in seq_along(grid)[-c(121, 122, 227, 228)]) { #seq_along(grid)[-c(46:47)] if((q %/% 10) == (q / 10)) cat(paste(" iteration: ", q, "\n")) Q <- grid[q] select_Load <- select.wkdata(wk_Load, i_00:Q) aux_Load <- wavkerfun(obj = select_Load, kerneltype = "gaussian", EPS = .Machine$double.eps) pred_Load[q, ] <- predict.wavkerfun(obj = aux_Load, cent = "DIFF")$X selects <- lapply(wks, function(k) select.wkdata(k, i_00:Q)) auxs <- lapply(selects, function(k) wavkerfun(obj = k, kerneltype = "gaussian", EPS = .Machine$double.eps)) for(k in 1:K_ICAME) preds[[k]][q, ] <- predict.wavkerfun(obj = auxs[[k]], cent = "DIFF")$X } top <- Sys.time() #rm(wks, selects, auxs, aux_Load, mat_ks, q, Q, wk_Load) preds <- lapply(preds, function(dd) {dd[dd<0] <- 0; dd} ) # Compute the prediction of synchrone by the sum of individuals pred_Load_sum <- Reduce('+', preds) vect_preds <- data.frame(lapply(preds, function(dd) c(t(dd)))) names(vect_preds) <- paste0("icame", 1:K_ICAME) vect_Load <- c(t(pred_Load)) vect_gtop <- matrix(NA_real_, ncol = K_ICAME, nrow = length(vect_Load)) for(tt in seq_along(vect_gtop)) { VALID <- !any(is.na(vect_preds[tt, ])) & # compute gtop only when no NAs tt < 17511 # these rows present a problem ==> why? (matbe too much 0s in preds_indiv) if(VALID) { res <- gtop(preds_indiv = as.numeric(vect_preds[tt, ]), pred_total = vect_Load[tt], weights_indiv = rep(1, K_ICAME), weight_total = 1, bounds_indiv = 0.05 * as.numeric(vect_preds[tt, ])) vect_gtop[tt, ] <- res$preds_indiv[1:K_ICAME] # last value is the sum } } vect_gtop_sum <- rowSums(vect_gtop) pred_gtop_sum <- matrix(vect_gtop_sum, ncol = 48, byrow = TRUE) ## 4. Analisis of the predictions #### resids_Load <- pred_Load - mat_Load[grid + 1, ] # i.e. reference days + 1 resids_Load_sum <- pred_Load_sum - mat_Load[grid + 1, ] # i.e. reference days + 1 resids_gtop_sum <- pred_gtop_sum - mat_Load[grid + 1, ] mape_Load <- 100 * rowMeans(abs(resids_Load / mat_Load[grid + 1, ])) rmse_Load <- sqrt(rowMeans(resids_Load^2)) mape_Load_sum <- 100 * rowMeans(abs(resids_Load_sum / mat_Load[grid + 1, ])) rmse_Load_sum <- sqrt(rowMeans(resids_Load_sum^2)) mape_gtop_sum <- 100 * rowMeans(abs(resids_gtop_sum / mat_Load[grid + 1, ])) rmse_gtop_sum <- sqrt(rowMeans(resids_gtop_sum^2)) mean(mape_Load, na.rm = TRUE) mean(mape_Load_sum, na.rm = TRUE) mean(mape_gtop_sum, na.rm = TRUE) #print(mean(mape_Load, na.rm = TRUE)) #print(mean(mape_Load_sum, na.rm = TRUE)) mean(rmse_Load, na.rm = TRUE) mean(rmse_Load_sum, na.rm = TRUE) mean(rmse_gtop_sum, na.rm = TRUE)