Commit | Line | Data |
---|---|---|
ad642dc6 BA |
1 | ## File : 06_predictions.r |
2 | ## Description : Calculer les synchrones pour chaque groupe | |
3 | ## obtenu par le clustering. | |
4 | ||
5 | rm(list = ls()) | |
6 | ||
7 | setwd("~/ownCloud/projects/2014_EDF-Orsay-Lyon2/codes/") | |
8 | ||
9 | ||
10 | ## 1. Read auxiliar data files #### | |
11 | ||
12 | identifiants <- read.table("identifs.txt")[ ,1] | |
13 | dates0 <- read.table("datesall.txt")[, 1] | |
14 | dates09 <- as.character(dates0[grep("2009", dates0)]) | |
15 | dates10 <- as.character(dates0[grep("2010", dates0)]) | |
16 | dates <- c(dates09, dates10) | |
17 | rm(dates0, dates09, dates10) | |
18 | ||
19 | n <- length(identifiants) | |
20 | p <- length(dates) | |
21 | ||
22 | # groups for the predictor | |
23 | gr_calen <- read.table("calendar_transition_groups-1996-2011.txt") | |
24 | i_start_calen <- which(rownames(gr_calen) == "2009-01-01") | |
25 | i_end_calen <- which(rownames(gr_calen) == "2010-12-31") | |
26 | gr <- gr_calen$gr[i_start_calen:i_end_calen] | |
27 | gr <- gsub("Thu", "TWT", gr) | |
28 | gr <- gsub("Wed", "TWT", gr) | |
29 | gr <- gsub("Tue", "TWT", gr) | |
30 | ||
31 | days <- seq.Date(from = as.Date("2009/01/01"), | |
32 | to = as.Date("2010/12/31"), | |
33 | by = 'day') | |
34 | ||
35 | #synchros09 <- as.matrix(read.table("~/tmp/2009_synchros200.txt")) | |
36 | #synchros10 <- as.matrix(read.table("~/tmp/2010_synchros200.txt")) | |
37 | #synchros09 <- as.matrix(read.table("~/tmp/2009_synchros200RC.txt")) | |
38 | #synchros10 <- as.matrix(read.table("~/tmp/2010_synchros200RC.txt")) | |
39 | synchros09 <- as.matrix(read.table("~/tmp/2009_synchros200-random.txt")) | |
40 | synchros10 <- as.matrix(read.table("~/tmp/2010_synchros200-random.txt")) | |
41 | ||
42 | ||
43 | synchros09 <- t(synchros09) # series must be on rows | |
44 | synchros10 <- t(synchros10) # series must be on rows | |
45 | ||
46 | ## 2. Imputation of missing values #### | |
47 | # (done by linear interpolation) | |
48 | nas <- which(is.na(synchros09)[, 1]) # some 1/1/2009 are missing | |
49 | synchros09[nas, 1] <- rowMeans(synchros09[nas, 2:4]) | |
50 | ||
51 | imput09 <- synchros09[, 4180:4181] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2) | |
52 | imput10 <- synchros10[, 4132:4133] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2) | |
53 | ||
54 | synchros09 <- cbind(synchros09[, 1:4180], imput09, synchros09[, 4181:17518]) | |
55 | synchros10 <- cbind(synchros10[, 1:4132], imput10, synchros10[, 4133:17518]) | |
56 | ||
57 | ||
58 | rm(imput09, imput10, nas) | |
59 | rm(gr_calen, i_start_calen, i_end_calen) | |
60 | ||
61 | # Set start and end timepoints for the prediction | |
62 | i_00 <- which(days == "2009-01-01") | |
63 | n_00 <- which(days == "2009-12-31") | |
64 | n_01 <- which(days == "2010-12-31") | |
65 | ||
66 | ||
67 | ## 3. Construct predictors #### | |
68 | library(kerwavfun) | |
69 | synchros <- (cbind( synchros09, synchros10 ))[-201, ] | |
70 | ||
71 | #clustfactors <- dir("~/tmp/", pattern = 'clustfactorAC') | |
72 | #clustfactors <- dir("~/tmp/", pattern = 'clustfactorRC') | |
73 | #clustfactors <- dir("~/tmp/", pattern = 'clustfactorWER') | |
74 | #clustfactors <- dir("~/tmp/", pattern = 'clustfactorRANDOM') | |
75 | clustfactors <- dir("~/tmp/", pattern = 'clustfactor-randomWER') | |
76 | ||
77 | maxK <- length(clustfactors) - 1 | |
78 | ||
79 | performK <- data.frame(K = rep(NA_real_, maxK), | |
80 | mape = rep(NA_real_, maxK), | |
81 | rmse = rep(NA_real_, maxK)) | |
82 | ||
83 | setwd("~/tmp/") | |
84 | for(cf in seq_along(clustfactors)) { | |
85 | # clustfactor <- read.table(file = "~/tmp/clustfactor3.txt") | |
86 | clustfactor <- read.table(file = clustfactors[cf]) | |
87 | ||
88 | K <- nrow(unique(clustfactor)) | |
89 | ||
90 | performK[cf, 1] <- K | |
91 | ||
92 | SC <- matrix(0, ncol = ncol(synchros), nrow = K) | |
93 | for(k in 1:K){ | |
94 | clustk <- which(clustfactor == k) | |
95 | if(length(clustk) > 0) { | |
96 | if(length(clustk) > 1) { | |
97 | SCk <- colSums(synchros[which(clustfactor == k), ]) | |
98 | } else { | |
99 | SCk <- synchros[which(clustfactor == k), ] | |
100 | } | |
101 | SC[k, ] <- SC[k, ] + SCk | |
102 | rm(SCk) | |
103 | } | |
104 | } | |
105 | ||
106 | mat_Load <- matrix(colSums(SC), ncol = 48, byrow = TRUE) | |
107 | mat_ks <- lapply(1:K, function(k) matrix(SC[k, ], ncol = 48, byrow = TRUE)) | |
108 | ||
109 | ||
110 | ## 2. Transform to wkdata (performs DWT) ## | |
111 | ||
112 | wk_Load <- wkdata(X = c(t(mat_Load)), | |
113 | gr = gr[1:nrow(mat_Load)], | |
114 | p = 48) | |
115 | ||
116 | wks <- lapply(mat_ks, function(mat_k) | |
117 | wkdata(X = c(t(mat_k)), gr = gr[1:nrow(mat_Load)], p = 48)) | |
118 | ||
119 | ## 3. Begin of big loop over ## | |
120 | grid <- n_00:(n_01 - 1) # these are the reference days | |
121 | #h_Load <- h_k1 <- h_k2 <- h_k3 <- double(length(grid)) | |
122 | ||
123 | # output matrix of predicted days | |
124 | # names of the predicted days (reference days + 1) | |
125 | pred_Load <- matrix(NA_real_, ncol = 48, nrow = length(grid)) | |
126 | preds <- lapply(1:K, function(k) pred_Load) | |
127 | ||
128 | go <- Sys.time() | |
129 | for(q in seq_along(grid)[-c(121, 122, 227, 228)]) { #seq_along(grid)[-c(46:47)] | |
130 | if((q %/% 50) == (q / 50)) cat(paste(" iteration: ", q, "\n")) | |
131 | Q <- grid[q] | |
132 | ||
133 | select_Load <- select.wkdata(wk_Load, i_00:Q) | |
134 | aux_Load <- wavkerfun(obj = select_Load, kerneltype = "gaussian", | |
135 | EPS = .Machine$double.eps) | |
136 | pred_Load[q, ] <- predict.wavkerfun(obj = aux_Load, cent = "DIFF")$X | |
137 | ||
138 | selects <- lapply(wks, function(k) select.wkdata(k, i_00:Q)) | |
139 | auxs <- lapply(selects, | |
140 | function(k) wavkerfun(obj = k, | |
141 | kerneltype = "gaussian", | |
142 | EPS = .Machine$double.eps)) | |
143 | for(k in 1:K) | |
144 | preds[[k]][q, ] <- predict.wavkerfun(obj = auxs[[k]], cent = "DIFF")$X | |
145 | ||
146 | } | |
147 | ||
148 | top <- Sys.time() | |
149 | #rm(wks, selects, auxs, aux_Load, mat_ks, q, Q, wk_Load) | |
150 | ||
151 | # Compute the prediction of synchrone by the sum of individuals | |
152 | pred_Load_sum <- Reduce('+', preds) | |
153 | ||
154 | ## 4. Analisis of the predictions #### | |
155 | resids_Load <- pred_Load - mat_Load[grid + 1, ] # i.e. reference days + 1 | |
156 | resids_Load_sum <- pred_Load_sum - mat_Load[grid + 1, ] # i.e. reference days + 1 | |
157 | ||
158 | mape_Load <- 100 * rowMeans(abs(resids_Load / mat_Load[grid + 1, ])) | |
159 | rmse_Load <- sqrt(rowMeans(resids_Load^2)) | |
160 | ||
161 | mape_Load_sum <- 100 * rowMeans(abs(resids_Load_sum / mat_Load[grid + 1, ])) | |
162 | rmse_Load_sum <- sqrt(rowMeans(resids_Load_sum^2)) | |
163 | ||
164 | performK[cf, 2] <- mean(mape_Load, na.rm = TRUE) | |
165 | performK[cf, 3] <- mean(mape_Load_sum, na.rm = TRUE) | |
166 | #print(mean(mape_Load, na.rm = TRUE)) | |
167 | #print(mean(mape_Load_sum, na.rm = TRUE)) | |
168 | ||
169 | } | |
170 | ||
171 | ||
172 | #performKrc <- read.table("../res/perform200RC.txt") | |
173 | #write.table(file = '~/ownCloud/projects/2014_EDF-Orsay-Lyon2/res/perform200-random.txt', | |
174 | # performK) | |
175 | #write.table(file = '~/Documents/projects/2014_EDF-Orsay-Lyon2/res/perform200RC.txt', | |
176 | # performK) | |
177 | #write.table(file = '~/Documents/projects/2014_EDF-Orsay-Lyon2/res/perform200WER_3.txt', | |
178 | # performK) | |
179 | ||
180 | #performK <- data.frame(K = performKrc$K, | |
181 | # rc = performKrc$rmse, | |
182 | # wer = performKwer$rmse) | |
183 | ||
184 | #perform <- rbind(performK, performKwer) | |
185 | #perform <- perform[order(perform$K), ] | |
186 | ||
187 | plot(perform$K, perform$rmse, type = 'l', ylim = c(1.7,2.2), | |
188 | lwd = 2, col = 2, xlab = "Nb Cluster", ylab = "MAPE (en %)") | |
189 | lines(performK$K, performKrc$rmse, col = 3, lwd = 2) | |
190 | abline(h = mean(performK$mape), lwd = 2) | |
191 | legend("topright", c('AGG', 'DIS euclid', "DIS WER"), lwd = 2, col = 1:3) | |
192 | ||
193 | ||
194 | ## Plot article ENERGYCON | |
195 | performKwer <- read.table("../res/perform200WER.txt") | |
196 | performKwer <- performKwer[order(performKwer$K), ] | |
197 | ||
198 | pdf("~/perf_wer.pdf") | |
199 | plot(performKwer$K, performKwer$rmse, type = 'l', | |
200 | ylim = c(1.7,2.2), lwd = 2, col = 2, | |
201 | xlab = "Nb Cluster", ylab = "MAPE (en %)") | |
202 | abline(h = mean(performKwer$mape), lwd = 2) | |
203 | legend("topright", c('AGG', "DIS WER"), lwd = 2, col = 1:2) | |
204 | dev.off() |