complete first draft of package
[epclust.git] / old_C_code / stage2_UNFINISHED / src / 06_predictions.r
CommitLineData
ad642dc6
BA
1## File : 06_predictions.r
2## Description : Calculer les synchrones pour chaque groupe
3## obtenu par le clustering.
4
5rm(list = ls())
6
7setwd("~/ownCloud/projects/2014_EDF-Orsay-Lyon2/codes/")
8
9
10## 1. Read auxiliar data files ####
11
12identifiants <- read.table("identifs.txt")[ ,1]
13dates0 <- read.table("datesall.txt")[, 1]
14dates09 <- as.character(dates0[grep("2009", dates0)])
15dates10 <- as.character(dates0[grep("2010", dates0)])
16dates <- c(dates09, dates10)
17rm(dates0, dates09, dates10)
18
19n <- length(identifiants)
20p <- length(dates)
21
22# groups for the predictor
23gr_calen <- read.table("calendar_transition_groups-1996-2011.txt")
24i_start_calen <- which(rownames(gr_calen) == "2009-01-01")
25i_end_calen <- which(rownames(gr_calen) == "2010-12-31")
26gr <- gr_calen$gr[i_start_calen:i_end_calen]
27gr <- gsub("Thu", "TWT", gr)
28gr <- gsub("Wed", "TWT", gr)
29gr <- gsub("Tue", "TWT", gr)
30
31days <- seq.Date(from = as.Date("2009/01/01"),
32 to = as.Date("2010/12/31"),
33 by = 'day')
34
35#synchros09 <- as.matrix(read.table("~/tmp/2009_synchros200.txt"))
36#synchros10 <- as.matrix(read.table("~/tmp/2010_synchros200.txt"))
37#synchros09 <- as.matrix(read.table("~/tmp/2009_synchros200RC.txt"))
38#synchros10 <- as.matrix(read.table("~/tmp/2010_synchros200RC.txt"))
39synchros09 <- as.matrix(read.table("~/tmp/2009_synchros200-random.txt"))
40synchros10 <- as.matrix(read.table("~/tmp/2010_synchros200-random.txt"))
41
42
43synchros09 <- t(synchros09) # series must be on rows
44synchros10 <- t(synchros10) # series must be on rows
45
46## 2. Imputation of missing values ####
47# (done by linear interpolation)
48nas <- which(is.na(synchros09)[, 1]) # some 1/1/2009 are missing
49synchros09[nas, 1] <- rowMeans(synchros09[nas, 2:4])
50
51imput09 <- synchros09[, 4180:4181] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2)
52imput10 <- synchros10[, 4132:4133] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2)
53
54synchros09 <- cbind(synchros09[, 1:4180], imput09, synchros09[, 4181:17518])
55synchros10 <- cbind(synchros10[, 1:4132], imput10, synchros10[, 4133:17518])
56
57
58rm(imput09, imput10, nas)
59rm(gr_calen, i_start_calen, i_end_calen)
60
61# Set start and end timepoints for the prediction
62i_00 <- which(days == "2009-01-01")
63n_00 <- which(days == "2009-12-31")
64n_01 <- which(days == "2010-12-31")
65
66
67## 3. Construct predictors ####
68library(kerwavfun)
69synchros <- (cbind( synchros09, synchros10 ))[-201, ]
70
71#clustfactors <- dir("~/tmp/", pattern = 'clustfactorAC')
72#clustfactors <- dir("~/tmp/", pattern = 'clustfactorRC')
73#clustfactors <- dir("~/tmp/", pattern = 'clustfactorWER')
74#clustfactors <- dir("~/tmp/", pattern = 'clustfactorRANDOM')
75clustfactors <- dir("~/tmp/", pattern = 'clustfactor-randomWER')
76
77maxK <- length(clustfactors) - 1
78
79performK <- data.frame(K = rep(NA_real_, maxK),
80 mape = rep(NA_real_, maxK),
81 rmse = rep(NA_real_, maxK))
82
83setwd("~/tmp/")
84for(cf in seq_along(clustfactors)) {
85# clustfactor <- read.table(file = "~/tmp/clustfactor3.txt")
86 clustfactor <- read.table(file = clustfactors[cf])
87
88 K <- nrow(unique(clustfactor))
89
90 performK[cf, 1] <- K
91
92 SC <- matrix(0, ncol = ncol(synchros), nrow = K)
93 for(k in 1:K){
94 clustk <- which(clustfactor == k)
95 if(length(clustk) > 0) {
96 if(length(clustk) > 1) {
97 SCk <- colSums(synchros[which(clustfactor == k), ])
98 } else {
99 SCk <- synchros[which(clustfactor == k), ]
100 }
101 SC[k, ] <- SC[k, ] + SCk
102 rm(SCk)
103 }
104 }
105
106 mat_Load <- matrix(colSums(SC), ncol = 48, byrow = TRUE)
107 mat_ks <- lapply(1:K, function(k) matrix(SC[k, ], ncol = 48, byrow = TRUE))
108
109
110 ## 2. Transform to wkdata (performs DWT) ##
111
112 wk_Load <- wkdata(X = c(t(mat_Load)),
113 gr = gr[1:nrow(mat_Load)],
114 p = 48)
115
116 wks <- lapply(mat_ks, function(mat_k)
117 wkdata(X = c(t(mat_k)), gr = gr[1:nrow(mat_Load)], p = 48))
118
119 ## 3. Begin of big loop over ##
120 grid <- n_00:(n_01 - 1) # these are the reference days
121 #h_Load <- h_k1 <- h_k2 <- h_k3 <- double(length(grid))
122
123 # output matrix of predicted days
124 # names of the predicted days (reference days + 1)
125 pred_Load <- matrix(NA_real_, ncol = 48, nrow = length(grid))
126 preds <- lapply(1:K, function(k) pred_Load)
127
128 go <- Sys.time()
129 for(q in seq_along(grid)[-c(121, 122, 227, 228)]) { #seq_along(grid)[-c(46:47)]
130 if((q %/% 50) == (q / 50)) cat(paste(" iteration: ", q, "\n"))
131 Q <- grid[q]
132
133 select_Load <- select.wkdata(wk_Load, i_00:Q)
134 aux_Load <- wavkerfun(obj = select_Load, kerneltype = "gaussian",
135 EPS = .Machine$double.eps)
136 pred_Load[q, ] <- predict.wavkerfun(obj = aux_Load, cent = "DIFF")$X
137
138 selects <- lapply(wks, function(k) select.wkdata(k, i_00:Q))
139 auxs <- lapply(selects,
140 function(k) wavkerfun(obj = k,
141 kerneltype = "gaussian",
142 EPS = .Machine$double.eps))
143 for(k in 1:K)
144 preds[[k]][q, ] <- predict.wavkerfun(obj = auxs[[k]], cent = "DIFF")$X
145
146 }
147
148 top <- Sys.time()
149 #rm(wks, selects, auxs, aux_Load, mat_ks, q, Q, wk_Load)
150
151 # Compute the prediction of synchrone by the sum of individuals
152 pred_Load_sum <- Reduce('+', preds)
153
154 ## 4. Analisis of the predictions ####
155 resids_Load <- pred_Load - mat_Load[grid + 1, ] # i.e. reference days + 1
156 resids_Load_sum <- pred_Load_sum - mat_Load[grid + 1, ] # i.e. reference days + 1
157
158 mape_Load <- 100 * rowMeans(abs(resids_Load / mat_Load[grid + 1, ]))
159 rmse_Load <- sqrt(rowMeans(resids_Load^2))
160
161 mape_Load_sum <- 100 * rowMeans(abs(resids_Load_sum / mat_Load[grid + 1, ]))
162 rmse_Load_sum <- sqrt(rowMeans(resids_Load_sum^2))
163
164 performK[cf, 2] <- mean(mape_Load, na.rm = TRUE)
165 performK[cf, 3] <- mean(mape_Load_sum, na.rm = TRUE)
166#print(mean(mape_Load, na.rm = TRUE))
167#print(mean(mape_Load_sum, na.rm = TRUE))
168
169}
170
171
172#performKrc <- read.table("../res/perform200RC.txt")
173#write.table(file = '~/ownCloud/projects/2014_EDF-Orsay-Lyon2/res/perform200-random.txt',
174# performK)
175#write.table(file = '~/Documents/projects/2014_EDF-Orsay-Lyon2/res/perform200RC.txt',
176# performK)
177#write.table(file = '~/Documents/projects/2014_EDF-Orsay-Lyon2/res/perform200WER_3.txt',
178# performK)
179
180#performK <- data.frame(K = performKrc$K,
181# rc = performKrc$rmse,
182# wer = performKwer$rmse)
183
184#perform <- rbind(performK, performKwer)
185#perform <- perform[order(perform$K), ]
186
187plot(perform$K, perform$rmse, type = 'l', ylim = c(1.7,2.2),
188 lwd = 2, col = 2, xlab = "Nb Cluster", ylab = "MAPE (en %)")
189lines(performK$K, performKrc$rmse, col = 3, lwd = 2)
190abline(h = mean(performK$mape), lwd = 2)
191legend("topright", c('AGG', 'DIS euclid', "DIS WER"), lwd = 2, col = 1:3)
192
193
194## Plot article ENERGYCON
195performKwer <- read.table("../res/perform200WER.txt")
196performKwer <- performKwer[order(performKwer$K), ]
197
198pdf("~/perf_wer.pdf")
199plot(performKwer$K, performKwer$rmse, type = 'l',
200 ylim = c(1.7,2.2), lwd = 2, col = 2,
201 xlab = "Nb Cluster", ylab = "MAPE (en %)")
202abline(h = mean(performKwer$mape), lwd = 2)
203legend("topright", c('AGG', "DIS WER"), lwd = 2, col = 1:2)
204dev.off()