complete first draft of package
[epclust.git] / old_C_code / stage2_UNFINISHED / src / unused / 06_predictions-ICAME.r
1 ## File : 06_predictions-ICAME.r
2 ## Description : Prévoir les synchrones des classes ICAME.
3
4
5 rm(list = ls())
6
7 if(Sys.info()[4] == "mojarrita"){
8 setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
9 } else {
10 setwd("~/2014_EDF-Orsay-Lyon2/codes/")
11 }
12
13 ## 1. Read auxiliar data files ####
14 #identifiants <- read.table("identifs.txt")[ ,1]
15 dates0 <- read.table("datesall.txt")[, 1]
16 dates09 <- as.character(dates0[grep("2009", dates0)])
17 dates10 <- as.character(dates0[grep("2010", dates0)])
18 dates <- c(dates09, dates10)
19 rm(dates0, dates09, dates10)
20
21 #n <- length(identifiants)
22 p <- length(dates)
23
24 # groups for the predictor
25 gr_calen <- read.table("calendar_transition_groups-1996-2011.txt")
26 i_start_calen <- which(rownames(gr_calen) == "2009-01-01")
27 i_end_calen <- which(rownames(gr_calen) == "2010-12-31")
28 gr <- gr_calen$gr[i_start_calen:i_end_calen]
29 gr <- gsub("Thu", "TWT", gr)
30 gr <- gsub("Wed", "TWT", gr)
31 gr <- gsub("Tue", "TWT", gr)
32
33 days <- seq.Date(from = as.Date("2009/01/01"),
34 to = as.Date("2010/12/31"),
35 by = 'day')
36
37
38 setwd("../classes ICAME/synchrones_classif_ICAME/")
39 files_ICAME <- dir(pattern = "*TR-32V*")
40 K_ICAME <- length(files_ICAME)
41 SC <- matrix(NA_real_, nrow = K_ICAME, ncol = p + 4) # NA missing in dates
42 # (summer time saving)
43
44 for(k in 1:K_ICAME){
45 df <- read.table(file = files_ICAME[k], header = TRUE)
46 SC[k, ] <- subset(x = df, # series must be on rows
47 subset = Year == "2009" | Year == "2010",
48 select = "Load")[[1]]
49 }
50
51
52 rm(df, files_ICAME, k)
53 rm(gr_calen, i_start_calen, i_end_calen)
54
55 # Set start and end timepoints for the prediction
56 i_00 <- which(days == "2009-01-01")
57 n_00 <- which(days == "2009-12-31")
58 n_01 <- which(days == "2010-12-31")
59
60
61 ## 2. Transform data into wkdata ####
62 library(kerwavfun)
63
64 mat_Load <- matrix(colSums(SC), ncol = 48, byrow = TRUE)
65 mat_ks <- lapply(1:K_ICAME, function(k) matrix(data = SC[k, ],
66 ncol = 48,
67 byrow = TRUE))
68
69 wk_Load <- wkdata(X = c(t(mat_Load)),
70 gr = gr[1:nrow(mat_Load)],
71 p = 48)
72
73 wks <- lapply(mat_ks, function(mat_k)
74 wkdata(X = c(t(mat_k)), gr = gr[1:nrow(mat_Load)], p = 48))
75
76 ## 3. Begin of big loop over grid ##
77 grid <- n_00:(n_01 - 1) # these are the reference days
78 #h_Load <- h_k1 <- h_k2 <- h_k3 <- double(length(grid))
79
80 # output matrix of predicted days
81 # names of the predicted days (reference days + 1)
82 pred_Load <- matrix(NA_real_, ncol = 48, nrow = length(grid))
83 preds <- lapply(1:K_ICAME, function(k) pred_Load)
84
85 go <- Sys.time()
86 for(q in seq_along(grid)[-c(121, 122, 227, 228)]) { #seq_along(grid)[-c(46:47)]
87 if((q %/% 10) == (q / 10)) cat(paste(" iteration: ", q, "\n"))
88 Q <- grid[q]
89
90 select_Load <- select.wkdata(wk_Load, i_00:Q)
91 aux_Load <- wavkerfun(obj = select_Load, kerneltype = "gaussian",
92 EPS = .Machine$double.eps)
93 pred_Load[q, ] <- predict.wavkerfun(obj = aux_Load, cent = "DIFF")$X
94
95 selects <- lapply(wks, function(k) select.wkdata(k, i_00:Q))
96 auxs <- lapply(selects,
97 function(k) wavkerfun(obj = k,
98 kerneltype = "gaussian",
99 EPS = .Machine$double.eps))
100 for(k in 1:K_ICAME)
101 preds[[k]][q, ] <- predict.wavkerfun(obj = auxs[[k]], cent = "DIFF")$X
102
103 }
104 top <- Sys.time()
105 #rm(wks, selects, auxs, aux_Load, mat_ks, q, Q, wk_Load)
106
107
108 preds <- lapply(preds, function(dd) {dd[dd<0] <- 0; dd} )
109 # Compute the prediction of synchrone by the sum of individuals
110 pred_Load_sum <- Reduce('+', preds)
111
112
113 vect_preds <- data.frame(lapply(preds, function(dd) c(t(dd))))
114 names(vect_preds) <- paste0("icame", 1:K_ICAME)
115
116 vect_Load <- c(t(pred_Load))
117 vect_gtop <- matrix(NA_real_, ncol = K_ICAME, nrow = length(vect_Load))
118
119
120 for(tt in seq_along(vect_gtop)) {
121 VALID <- !any(is.na(vect_preds[tt, ])) & # compute gtop only when no NAs
122 tt < 17511 # these rows present a problem ==> why? (matbe too much 0s in preds_indiv)
123 if(VALID) {
124 res <- gtop(preds_indiv = as.numeric(vect_preds[tt, ]),
125 pred_total = vect_Load[tt],
126 weights_indiv = rep(1, K_ICAME),
127 weight_total = 1,
128 bounds_indiv = 0.05 * as.numeric(vect_preds[tt, ]))
129 vect_gtop[tt, ] <- res$preds_indiv[1:K_ICAME] # last value is the sum
130 }
131 }
132
133 vect_gtop_sum <- rowSums(vect_gtop)
134 pred_gtop_sum <- matrix(vect_gtop_sum, ncol = 48, byrow = TRUE)
135
136 ## 4. Analisis of the predictions ####
137 resids_Load <- pred_Load - mat_Load[grid + 1, ] # i.e. reference days + 1
138 resids_Load_sum <- pred_Load_sum - mat_Load[grid + 1, ] # i.e. reference days + 1
139 resids_gtop_sum <- pred_gtop_sum - mat_Load[grid + 1, ]
140
141 mape_Load <- 100 * rowMeans(abs(resids_Load / mat_Load[grid + 1, ]))
142 rmse_Load <- sqrt(rowMeans(resids_Load^2))
143
144 mape_Load_sum <- 100 * rowMeans(abs(resids_Load_sum / mat_Load[grid + 1, ]))
145 rmse_Load_sum <- sqrt(rowMeans(resids_Load_sum^2))
146
147 mape_gtop_sum <- 100 * rowMeans(abs(resids_gtop_sum / mat_Load[grid + 1, ]))
148 rmse_gtop_sum <- sqrt(rowMeans(resids_gtop_sum^2))
149
150
151 mean(mape_Load, na.rm = TRUE)
152 mean(mape_Load_sum, na.rm = TRUE)
153 mean(mape_gtop_sum, na.rm = TRUE)
154 #print(mean(mape_Load, na.rm = TRUE))
155 #print(mean(mape_Load_sum, na.rm = TRUE))
156
157
158 mean(rmse_Load, na.rm = TRUE)
159 mean(rmse_Load_sum, na.rm = TRUE)
160 mean(rmse_gtop_sum, na.rm = TRUE)