Commit | Line | Data |
---|---|---|
ad642dc6 BA |
1 | ## File : 06_predictions-ICAME.r |
2 | ## Description : Prévoir les synchrones des classes ICAME. | |
3 | ||
4 | ||
5 | rm(list = ls()) | |
6 | ||
7 | if(Sys.info()[4] == "mojarrita"){ | |
8 | setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/") | |
9 | } else { | |
10 | setwd("~/2014_EDF-Orsay-Lyon2/codes/") | |
11 | } | |
12 | ||
13 | ## 1. Read auxiliar data files #### | |
14 | #identifiants <- read.table("identifs.txt")[ ,1] | |
15 | dates0 <- read.table("datesall.txt")[, 1] | |
16 | dates09 <- as.character(dates0[grep("2009", dates0)]) | |
17 | dates10 <- as.character(dates0[grep("2010", dates0)]) | |
18 | dates <- c(dates09, dates10) | |
19 | rm(dates0, dates09, dates10) | |
20 | ||
21 | #n <- length(identifiants) | |
22 | p <- length(dates) | |
23 | ||
24 | # groups for the predictor | |
25 | gr_calen <- read.table("calendar_transition_groups-1996-2011.txt") | |
26 | i_start_calen <- which(rownames(gr_calen) == "2009-01-01") | |
27 | i_end_calen <- which(rownames(gr_calen) == "2010-12-31") | |
28 | gr <- gr_calen$gr[i_start_calen:i_end_calen] | |
29 | gr <- gsub("Thu", "TWT", gr) | |
30 | gr <- gsub("Wed", "TWT", gr) | |
31 | gr <- gsub("Tue", "TWT", gr) | |
32 | ||
33 | days <- seq.Date(from = as.Date("2009/01/01"), | |
34 | to = as.Date("2010/12/31"), | |
35 | by = 'day') | |
36 | ||
37 | ||
38 | setwd("../classes ICAME/synchrones_classif_ICAME/") | |
39 | files_ICAME <- dir(pattern = "*TR-32V*") | |
40 | K_ICAME <- length(files_ICAME) | |
41 | SC <- matrix(NA_real_, nrow = K_ICAME, ncol = p + 4) # NA missing in dates | |
42 | # (summer time saving) | |
43 | ||
44 | for(k in 1:K_ICAME){ | |
45 | df <- read.table(file = files_ICAME[k], header = TRUE) | |
46 | SC[k, ] <- subset(x = df, # series must be on rows | |
47 | subset = Year == "2009" | Year == "2010", | |
48 | select = "Load")[[1]] | |
49 | } | |
50 | ||
51 | ||
52 | rm(df, files_ICAME, k) | |
53 | rm(gr_calen, i_start_calen, i_end_calen) | |
54 | ||
55 | # Set start and end timepoints for the prediction | |
56 | i_00 <- which(days == "2009-01-01") | |
57 | n_00 <- which(days == "2009-12-31") | |
58 | n_01 <- which(days == "2010-12-31") | |
59 | ||
60 | ||
61 | ## 2. Transform data into wkdata #### | |
62 | library(kerwavfun) | |
63 | ||
64 | mat_Load <- matrix(colSums(SC), ncol = 48, byrow = TRUE) | |
65 | mat_ks <- lapply(1:K_ICAME, function(k) matrix(data = SC[k, ], | |
66 | ncol = 48, | |
67 | byrow = TRUE)) | |
68 | ||
69 | wk_Load <- wkdata(X = c(t(mat_Load)), | |
70 | gr = gr[1:nrow(mat_Load)], | |
71 | p = 48) | |
72 | ||
73 | wks <- lapply(mat_ks, function(mat_k) | |
74 | wkdata(X = c(t(mat_k)), gr = gr[1:nrow(mat_Load)], p = 48)) | |
75 | ||
76 | ## 3. Begin of big loop over grid ## | |
77 | grid <- n_00:(n_01 - 1) # these are the reference days | |
78 | #h_Load <- h_k1 <- h_k2 <- h_k3 <- double(length(grid)) | |
79 | ||
80 | # output matrix of predicted days | |
81 | # names of the predicted days (reference days + 1) | |
82 | pred_Load <- matrix(NA_real_, ncol = 48, nrow = length(grid)) | |
83 | preds <- lapply(1:K_ICAME, function(k) pred_Load) | |
84 | ||
85 | go <- Sys.time() | |
86 | for(q in seq_along(grid)[-c(121, 122, 227, 228)]) { #seq_along(grid)[-c(46:47)] | |
87 | if((q %/% 10) == (q / 10)) cat(paste(" iteration: ", q, "\n")) | |
88 | Q <- grid[q] | |
89 | ||
90 | select_Load <- select.wkdata(wk_Load, i_00:Q) | |
91 | aux_Load <- wavkerfun(obj = select_Load, kerneltype = "gaussian", | |
92 | EPS = .Machine$double.eps) | |
93 | pred_Load[q, ] <- predict.wavkerfun(obj = aux_Load, cent = "DIFF")$X | |
94 | ||
95 | selects <- lapply(wks, function(k) select.wkdata(k, i_00:Q)) | |
96 | auxs <- lapply(selects, | |
97 | function(k) wavkerfun(obj = k, | |
98 | kerneltype = "gaussian", | |
99 | EPS = .Machine$double.eps)) | |
100 | for(k in 1:K_ICAME) | |
101 | preds[[k]][q, ] <- predict.wavkerfun(obj = auxs[[k]], cent = "DIFF")$X | |
102 | ||
103 | } | |
104 | top <- Sys.time() | |
105 | #rm(wks, selects, auxs, aux_Load, mat_ks, q, Q, wk_Load) | |
106 | ||
107 | ||
108 | preds <- lapply(preds, function(dd) {dd[dd<0] <- 0; dd} ) | |
109 | # Compute the prediction of synchrone by the sum of individuals | |
110 | pred_Load_sum <- Reduce('+', preds) | |
111 | ||
112 | ||
113 | vect_preds <- data.frame(lapply(preds, function(dd) c(t(dd)))) | |
114 | names(vect_preds) <- paste0("icame", 1:K_ICAME) | |
115 | ||
116 | vect_Load <- c(t(pred_Load)) | |
117 | vect_gtop <- matrix(NA_real_, ncol = K_ICAME, nrow = length(vect_Load)) | |
118 | ||
119 | ||
120 | for(tt in seq_along(vect_gtop)) { | |
121 | VALID <- !any(is.na(vect_preds[tt, ])) & # compute gtop only when no NAs | |
122 | tt < 17511 # these rows present a problem ==> why? (matbe too much 0s in preds_indiv) | |
123 | if(VALID) { | |
124 | res <- gtop(preds_indiv = as.numeric(vect_preds[tt, ]), | |
125 | pred_total = vect_Load[tt], | |
126 | weights_indiv = rep(1, K_ICAME), | |
127 | weight_total = 1, | |
128 | bounds_indiv = 0.05 * as.numeric(vect_preds[tt, ])) | |
129 | vect_gtop[tt, ] <- res$preds_indiv[1:K_ICAME] # last value is the sum | |
130 | } | |
131 | } | |
132 | ||
133 | vect_gtop_sum <- rowSums(vect_gtop) | |
134 | pred_gtop_sum <- matrix(vect_gtop_sum, ncol = 48, byrow = TRUE) | |
135 | ||
136 | ## 4. Analisis of the predictions #### | |
137 | resids_Load <- pred_Load - mat_Load[grid + 1, ] # i.e. reference days + 1 | |
138 | resids_Load_sum <- pred_Load_sum - mat_Load[grid + 1, ] # i.e. reference days + 1 | |
139 | resids_gtop_sum <- pred_gtop_sum - mat_Load[grid + 1, ] | |
140 | ||
141 | mape_Load <- 100 * rowMeans(abs(resids_Load / mat_Load[grid + 1, ])) | |
142 | rmse_Load <- sqrt(rowMeans(resids_Load^2)) | |
143 | ||
144 | mape_Load_sum <- 100 * rowMeans(abs(resids_Load_sum / mat_Load[grid + 1, ])) | |
145 | rmse_Load_sum <- sqrt(rowMeans(resids_Load_sum^2)) | |
146 | ||
147 | mape_gtop_sum <- 100 * rowMeans(abs(resids_gtop_sum / mat_Load[grid + 1, ])) | |
148 | rmse_gtop_sum <- sqrt(rowMeans(resids_gtop_sum^2)) | |
149 | ||
150 | ||
151 | mean(mape_Load, na.rm = TRUE) | |
152 | mean(mape_Load_sum, na.rm = TRUE) | |
153 | mean(mape_gtop_sum, na.rm = TRUE) | |
154 | #print(mean(mape_Load, na.rm = TRUE)) | |
155 | #print(mean(mape_Load_sum, na.rm = TRUE)) | |
156 | ||
157 | ||
158 | mean(rmse_Load, na.rm = TRUE) | |
159 | mean(rmse_Load_sum, na.rm = TRUE) | |
160 | mean(rmse_gtop_sum, na.rm = TRUE) |