X-Git-Url: https://git.auder.net/?a=blobdiff_plain;ds=sidebyside;f=code%2Fstage2%2Fsrc%2Funused%2F04_predictions.r;fp=code%2Fstage2%2Fsrc%2Funused%2F04_predictions.r;h=0000000000000000000000000000000000000000;hb=7709d507dfab9256a401f2c77ced7bc70d90fec3;hp=010880e69dcf7467d0d69dd605ba7c4e60feb933;hpb=38aef1cbef037257bf03dd1e65fbb682a32e3f2c;p=epclust.git diff --git a/code/stage2/src/unused/04_predictions.r b/code/stage2/src/unused/04_predictions.r deleted file mode 100644 index 010880e..0000000 --- a/code/stage2/src/unused/04_predictions.r +++ /dev/null @@ -1,159 +0,0 @@ -## File : 04_predictions.r -## Description : Calculer les synchrones pour chaque groupe obtenu par le -## clustering. - -rm(list = ls()) - -setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/") - -## 1. Read auxiliar data files #### - -#identifiants <- read.table("identifs.txt")[ ,1] -dates0 <- read.table("datesall.txt")[, 1] -dates09 <- as.character(dates0[grep("2009", dates0)]) -dates10 <- as.character(dates0[grep("2010", dates0)]) -dates <- c(dates09, dates10) -rm(dates0, dates09, dates10) - -n <- length(identifiants) -p <- length(dates) - -# groups for the predictor -gr_calen <- read.table("calendar_transition_groups-1996-2011.txt") -i_start_calen <- which(rownames(gr_calen) == "2009-01-01") -i_end_calen <- which(rownames(gr_calen) == "2010-12-31") -gr <- gr_calen$gr[i_start_calen:i_end_calen] -gr <- gsub("Thu", "TWT", gr) -gr <- gsub("Wed", "TWT", gr) -gr <- gsub("Tue", "TWT", gr) - -days <- seq.Date(from = as.Date("2009/01/01"), - to = as.Date("2010/12/31"), - by = 'day') - -synchros09 <- as.matrix(read.table("~/tmp/2009_synchros200.txt")) -synchros10 <- as.matrix(read.table("~/tmp/2010_synchros200.txt")) - -## 2. Imputation of missing values #### -# (done by linear interpolation) -nas <- which(is.na(synchros09)[, 1]) # some 1/1/2009 are missing -synchros09[nas, 1] <- rowMeans(synchros09[nas, 2:4]) - -imput09 <- synchros09[, 4180:4181] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2) -imput10 <- synchros10[, 4132:4133] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2) - -synchros09 <- cbind(synchros09[, 1:4180], imput09, synchros09[, 4181:17518]) -synchros10 <- cbind(synchros10[, 1:4132], imput10, synchros10[, 4133:17518]) - - -rm(imput09, imput10, nas) -rm(gr_calen, i_start_calen, i_end_calen) - -# Set start and end timepoints for the prediction -i_00 <- which(days == "2009-01-01") -n_00 <- which(days == "2009-12-31") -n_01 <- which(days == "2010-12-31") - - -## 3. Construct predictors #### -library(kerwavfun) -synchros <- (cbind( synchros09, synchros10 )) - -clustfactors <- dir("~/tmp/", pattern = 'clustfactor') -maxK <- length(clustfactors) - 1 - -performK <- data.frame(K = rep(NA_real_, maxK), - mape = rep(NA_real_, maxK), - rmse = rep(NA_real_, maxK)) - -setwd("~/tmp/") -for(cf in seq_along(clustfactors)) { - #clustfactor <- read.table(file = "~/tmp/clustfactor3.txt") - clustfactor <- read.table(file = clustfactors[cf]) - - K <- nrow(unique(clustfactor)) - - performK[cf, 1] <- K - - SC <- matrix(0, ncol = ncol(synchros), nrow = K) - for(k in 1:K){ - clustk <- which(clustfactor == k) - if(length(clustk) > 0) { - if(length(clustk) > 1) { - SCk <- colSums(synchros[which(clustfactor == k), ]) - } else { - SCk <- synchros[which(clustfactor == k), ] - } - SC[k, ] <- SC[k, ] + SCk - rm(SCk) - } - } - - mat_Load <- matrix(colSums(SC), ncol = 48, byrow = TRUE) - mat_ks <- lapply(1:K, function(k) matrix(SC[k, ], ncol = 48, byrow = TRUE)) - - - ## 2. Transform to wkdata (performs DWT) ## - - wk_Load <- wkdata(X = c(t(mat_Load)), - gr = gr[1:nrow(mat_Load)], - p = 48) - - wks <- lapply(mat_ks, function(mat_k) - wkdata(X = c(t(mat_k)), gr = gr[1:nrow(mat_Load)], p = 48)) - - ## 3. Begin of big loop over ## - grid <- n_00:(n_01 - 1) # these are the reference days - #h_Load <- h_k1 <- h_k2 <- h_k3 <- double(length(grid)) - - # output matrix of predicted days - # names of the predicted days (reference days + 1) - pred_Load <- matrix(NA_real_, ncol = 48, nrow = length(grid)) - preds <- lapply(1:K, function(k) pred_Load) - - go <- Sys.time() - for(q in seq_along(grid)[-c(121, 122, 227, 228)]) { #seq_along(grid)[-c(46:47)] - if((q %/% 10) == (q / 10)) cat(paste(" iteration: ", q, "\n")) - Q <- grid[q] - - select_Load <- select.wkdata(wk_Load, i_00:Q) - aux_Load <- wavkerfun(obj = select_Load, kerneltype = "gaussian", - EPS = .Machine$double.eps) - pred_Load[q, ] <- predict.wavkerfun(obj = aux_Load, cent = "DIFF")$X - - selects <- lapply(wks, function(k) select.wkdata(k, i_00:Q)) - auxs <- lapply(selects, - function(k) wavkerfun(obj = k, - kerneltype = "gaussian", - EPS = .Machine$double.eps)) - for(k in 1:K) - preds[[k]][q, ] <- predict.wavkerfun(obj = auxs[[k]], cent = "DIFF")$X - - } - - top <- Sys.time() - #rm(wks, selects, auxs, aux_Load, mat_ks, q, Q, wk_Load) - - # Compute the prediction of synchrone by the sum of individuals - pred_Load_sum <- Reduce('+', preds) - - ## 4. Analisis of the predictions #### - resids_Load <- pred_Load - mat_Load[grid + 1, ] # i.e. reference days + 1 - resids_Load_sum <- pred_Load_sum - mat_Load[grid + 1, ] # i.e. reference days + 1 - - mape_Load <- 100 * rowMeans(abs(resids_Load / mat_Load[grid + 1, ])) - rmse_Load <- sqrt(rowMeans(resids_Load^2)) - - mape_Load_sum <- 100 * rowMeans(abs(resids_Load_sum / mat_Load[grid + 1, ])) - rmse_Load_sum <- sqrt(rowMeans(resids_Load_sum^2)) - - performK[cf, 2] <- mean(mape_Load, na.rm = TRUE) - performK[cf, 3] <- mean(mape_Load_sum, na.rm = TRUE) -#print(mean(mape_Load, na.rm = TRUE)) -#print(mean(mape_Load_sum, na.rm = TRUE)) - -} - -performK <- performK[order(performK$K), ] -plot(performK$K, performK$rmse, type = 'l', ylim = c(2,3)) -abline(h = mean(performK$mape), col = 2)