throw away old code, prepare tests

[epclust.git] / old_C_code / stage2_UNFINISHED / src / unused / 06_predictions-ICAME.r
diff --git a/old_C_code/stage2_UNFINISHED/src/unused/06_predictions-ICAME.r b/old_C_code/stage2_UNFINISHED/src/unused/06_predictions-ICAME.r

deleted file mode 100644 (file)

index b7696a2..0000000
--- a/old_C_code/stage2_UNFINISHED/src/unused/06_predictions-ICAME.r
+++ /dev/null
@@ -1,160 +0,0 @@
-## File : 06_predictions-ICAME.r
-## Description : Prévoir les synchrones des classes ICAME. 
-
-
-rm(list = ls())
-
-if(Sys.info()[4] ==  "mojarrita"){ 
-  setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
-} else {
-  setwd("~/2014_EDF-Orsay-Lyon2/codes/")
-}
-
-## 1. Read auxiliar data files ####
-#identifiants <- read.table("identifs.txt")[ ,1]
-dates0       <- read.table("datesall.txt")[, 1]
-dates09      <- as.character(dates0[grep("2009", dates0)])
-dates10      <- as.character(dates0[grep("2010", dates0)])
-dates        <- c(dates09, dates10)
-rm(dates0, dates09, dates10)
-
-#n <- length(identifiants)
-p <- length(dates)
-
-# groups for the predictor
-gr_calen <- read.table("calendar_transition_groups-1996-2011.txt")
-i_start_calen <- which(rownames(gr_calen) == "2009-01-01")
-i_end_calen   <- which(rownames(gr_calen) == "2010-12-31")
-gr <- gr_calen$gr[i_start_calen:i_end_calen]
-gr <- gsub("Thu", "TWT", gr)
-gr <- gsub("Wed", "TWT", gr)
-gr <- gsub("Tue", "TWT", gr)
-
-days <- seq.Date(from = as.Date("2009/01/01"),
-                 to   = as.Date("2010/12/31"),
-                 by   = 'day')
-
-
-setwd("../classes ICAME/synchrones_classif_ICAME/")
-files_ICAME <- dir(pattern = "*TR-32V*")
-K_ICAME  <- length(files_ICAME)
-SC       <- matrix(NA_real_, nrow = K_ICAME, ncol = p + 4) # NA missing in dates 
-                                                           # (summer time saving)
-
-for(k in 1:K_ICAME){ 
-  df <- read.table(file = files_ICAME[k], header = TRUE)
-  SC[k, ] <- subset(x      = df,                    # series must be on rows
-                    subset = Year == "2009" | Year == "2010", 
-                    select = "Load")[[1]]
-}
-
-
-rm(df, files_ICAME, k)
-rm(gr_calen, i_start_calen, i_end_calen)
-
-# Set start and end timepoints for the prediction
-i_00      <- which(days == "2009-01-01")
-n_00      <- which(days == "2009-12-31") 
-n_01      <- which(days == "2010-12-31")
-
-
-## 2. Transform data into wkdata ####
-library(kerwavfun)
-
-mat_Load <- matrix(colSums(SC),   ncol = 48, byrow = TRUE)
-mat_ks   <- lapply(1:K_ICAME, function(k) matrix(data  = SC[k, ], 
-                                                 ncol  = 48, 
-                                                 byrow = TRUE))
-  
-wk_Load <- wkdata(X  = c(t(mat_Load)), 
-                  gr = gr[1:nrow(mat_Load)], 
-                  p  = 48)
-  
-wks <- lapply(mat_ks, function(mat_k) 
-       wkdata(X = c(t(mat_k)), gr = gr[1:nrow(mat_Load)], p = 48))
-  
-## 3. Begin of big loop over grid ##
-grid <- n_00:(n_01 - 1) # these are the reference days
-#h_Load <- h_k1 <- h_k2 <- h_k3 <- double(length(grid))
-  
-# output matrix of predicted days 
-# names of the predicted days (reference days + 1)
-pred_Load <- matrix(NA_real_, ncol = 48, nrow = length(grid))
-preds     <- lapply(1:K_ICAME, function(k) pred_Load)
-  
-go <- Sys.time()
-for(q in seq_along(grid)[-c(121, 122, 227, 228)]) { #seq_along(grid)[-c(46:47)]
-  if((q %/% 10) == (q / 10)) cat(paste("  iteration: ", q, "\n"))
-  Q  <- grid[q]
-    
-  select_Load <- select.wkdata(wk_Load, i_00:Q)
-  aux_Load  <- wavkerfun(obj = select_Load, kerneltype = "gaussian",
-                         EPS = .Machine$double.eps) 
-  pred_Load[q, ] <- predict.wavkerfun(obj = aux_Load, cent = "DIFF")$X
-    
-  selects <- lapply(wks, function(k) select.wkdata(k, i_00:Q))
-  auxs    <- lapply(selects, 
-                    function(k) wavkerfun(obj = k, 
-                                          kerneltype = "gaussian",
-                                          EPS = .Machine$double.eps))  
-  for(k in 1:K_ICAME) 
-    preds[[k]][q, ] <- predict.wavkerfun(obj = auxs[[k]], cent = "DIFF")$X
-    
-}
-top <- Sys.time()
-#rm(wks, selects, auxs, aux_Load, mat_ks, q, Q, wk_Load)
-  
-
-preds <- lapply(preds, function(dd) {dd[dd<0] <- 0; dd} )
-# Compute the prediction of synchrone by the sum of individuals
-pred_Load_sum <- Reduce('+', preds) 
-  
-
-vect_preds <- data.frame(lapply(preds, function(dd) c(t(dd))))
-names(vect_preds) <- paste0("icame", 1:K_ICAME)
-
-vect_Load <- c(t(pred_Load))
-vect_gtop <- matrix(NA_real_, ncol = K_ICAME, nrow = length(vect_Load))
-
-
-for(tt in seq_along(vect_gtop)) {
-  VALID <- !any(is.na(vect_preds[tt, ])) & # compute gtop only when no NAs
-           tt < 17511  # these rows present a problem ==> why? (matbe too much 0s in preds_indiv)
-    if(VALID) { 
-    res <- gtop(preds_indiv   = as.numeric(vect_preds[tt, ]),
-                pred_total    = vect_Load[tt],
-                weights_indiv = rep(1, K_ICAME),
-                weight_total = 1,
-                bounds_indiv = 0.05 * as.numeric(vect_preds[tt, ]))
-    vect_gtop[tt, ] <- res$preds_indiv[1:K_ICAME] # last value is the sum
-    }
-  }
-
-vect_gtop_sum <- rowSums(vect_gtop)
-pred_gtop_sum <- matrix(vect_gtop_sum, ncol = 48, byrow = TRUE)
-
-## 4. Analisis of the predictions  ####
-resids_Load     <- pred_Load     - mat_Load[grid + 1, ] # i.e. reference days + 1
-resids_Load_sum <- pred_Load_sum - mat_Load[grid + 1, ] # i.e. reference days + 1
-resids_gtop_sum <- pred_gtop_sum - mat_Load[grid + 1, ]
-
-mape_Load <- 100 * rowMeans(abs(resids_Load / mat_Load[grid + 1, ]))
-rmse_Load <- sqrt(rowMeans(resids_Load^2))
-  
-mape_Load_sum <- 100 * rowMeans(abs(resids_Load_sum / mat_Load[grid + 1, ]))
-rmse_Load_sum <- sqrt(rowMeans(resids_Load_sum^2))
-
-mape_gtop_sum <- 100 * rowMeans(abs(resids_gtop_sum / mat_Load[grid + 1, ]))
-rmse_gtop_sum <- sqrt(rowMeans(resids_gtop_sum^2))
-
-
-mean(mape_Load, na.rm = TRUE)
-mean(mape_Load_sum, na.rm = TRUE)
-mean(mape_gtop_sum, na.rm = TRUE)
-#print(mean(mape_Load, na.rm = TRUE))
-#print(mean(mape_Load_sum, na.rm = TRUE))
-  
-
-mean(rmse_Load, na.rm = TRUE)
-mean(rmse_Load_sum, na.rm = TRUE)
-mean(rmse_gtop_sum, na.rm = TRUE)