Commit | Line | Data |
---|---|---|
ad642dc6 BA |
1 | ## File : 06_predictions.r |
2 | ## Description : Calculer les synchrones pour chaque groupe | |
3 | ## obtenu par le clustering. | |
4 | ||
5 | rm(list = ls()) | |
6 | ||
7 | if(Sys.info()[4] == "mojarrita"){ | |
8 | setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/") | |
9 | } else { | |
10 | setwd("~/2014_EDF-Orsay-Lyon2/codes/") | |
11 | } | |
12 | ||
13 | ## 1. Read auxiliar data files #### | |
14 | ||
15 | identifiants <- read.table("identifs.txt")[ ,1] | |
16 | dates0 <- read.table("datesall.txt")[, 1] | |
17 | dates09 <- as.character(dates0[grep("2009", dates0)]) | |
18 | dates10 <- as.character(dates0[grep("2010", dates0)]) | |
19 | dates <- c(dates09, dates10) | |
20 | rm(dates0, dates09, dates10) | |
21 | ||
22 | n <- length(identifiants) | |
23 | p <- length(dates) | |
24 | ||
25 | # groups for the predictor | |
26 | gr_calen <- read.table("calendar_transition_groups-1996-2011.txt") | |
27 | i_start_calen <- which(rownames(gr_calen) == "2009-01-01") | |
28 | i_end_calen <- which(rownames(gr_calen) == "2010-12-31") | |
29 | gr <- gr_calen$gr[i_start_calen:i_end_calen] | |
30 | gr <- gsub("Thu", "TWT", gr) | |
31 | gr <- gsub("Wed", "TWT", gr) | |
32 | gr <- gsub("Tue", "TWT", gr) | |
33 | ||
34 | days <- seq.Date(from = as.Date("2009/01/01"), | |
35 | to = as.Date("2010/12/31"), | |
36 | by = 'day') | |
37 | ||
38 | #synchros09 <- as.matrix(read.table("~/tmp/2009_synchros200.txt")) | |
39 | #synchros10 <- as.matrix(read.table("~/tmp/2010_synchros200.txt")) | |
40 | synchros09 <- as.matrix(read.table("~/tmp/2009_synchros200RC.txt")) | |
41 | synchros10 <- as.matrix(read.table("~/tmp/2010_synchros200RC.txt")) | |
42 | ||
43 | synchros09 <- t(synchros09) # series must be on rows | |
44 | synchros10 <- t(synchros10) # series must be on rows | |
45 | ||
46 | ## 2. Imputation of missing values #### | |
47 | # (done by linear interpolation) | |
48 | nas <- which(is.na(synchros09)[, 1]) # some 1/1/2009 are missing | |
49 | synchros09[nas, 1] <- rowMeans(synchros09[nas, 2:4]) | |
50 | ||
51 | imput09 <- synchros09[, 4180:4181] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2) | |
52 | imput10 <- synchros10[, 4132:4133] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2) | |
53 | ||
54 | synchros09 <- cbind(synchros09[, 1:4180], imput09, synchros09[, 4181:17518]) | |
55 | synchros10 <- cbind(synchros10[, 1:4132], imput10, synchros10[, 4133:17518]) | |
56 | ||
57 | ||
58 | rm(imput09, imput10, nas) | |
59 | rm(gr_calen, i_start_calen, i_end_calen) | |
60 | ||
61 | # Set start and end timepoints for the prediction | |
62 | i_00 <- which(days == "2009-01-01") | |
63 | n_00 <- which(days == "2009-12-31") | |
64 | n_01 <- which(days == "2010-12-31") | |
65 | ||
66 | ||
67 | ## 3. Construct predictors #### | |
68 | library(kerwavfun) | |
69 | synchros <- (cbind( synchros09, synchros10 ))[-201, ] | |
70 | ||
71 | #clustfactors <- dir("~/tmp/", pattern = 'clustfactorAC') | |
72 | #clustfactors <- dir("~/tmp/", pattern = 'clustfactorRC') | |
73 | clustfactors <- dir("~/tmp/", pattern = 'clustfactorWER') | |
74 | ||
75 | maxK <- length(clustfactors) - 1 | |
76 | ||
77 | performK <- data.frame(K = rep(NA_real_, maxK), | |
78 | mape = rep(NA_real_, maxK), | |
79 | rmse = rep(NA_real_, maxK)) | |
80 | ||
81 | setwd("~/tmp/") | |
82 | for(cf in seq_along(clustfactors)) { | |
83 | #clustfactor <- read.table(file = "~/tmp/clustfactor3.txt") | |
84 | clustfactor <- read.table(file = clustfactors[cf]) | |
85 | ||
86 | K <- nrow(unique(clustfactor)) | |
87 | ||
88 | performK[cf, 1] <- K | |
89 | ||
90 | SC <- matrix(0, ncol = ncol(synchros), nrow = K) | |
91 | for(k in 1:K){ | |
92 | clustk <- which(clustfactor == k) | |
93 | if(length(clustk) > 0) { | |
94 | if(length(clustk) > 1) { | |
95 | SCk <- colSums(synchros[which(clustfactor == k), ]) | |
96 | } else { | |
97 | SCk <- synchros[which(clustfactor == k), ] | |
98 | } | |
99 | SC[k, ] <- SC[k, ] + SCk | |
100 | rm(SCk) | |
101 | } | |
102 | } | |
103 | ||
104 | mat_Load <- matrix(colSums(SC), ncol = 48, byrow = TRUE) | |
105 | mat_ks <- lapply(1:K, function(k) matrix(SC[k, ], ncol = 48, byrow = TRUE)) | |
106 | ||
107 | ||
108 | ## 2. Transform to wkdata (performs DWT) ## | |
109 | ||
110 | wk_Load <- wkdata(X = c(t(mat_Load)), | |
111 | gr = gr[1:nrow(mat_Load)], | |
112 | p = 48) | |
113 | ||
114 | wks <- lapply(mat_ks, function(mat_k) | |
115 | wkdata(X = c(t(mat_k)), gr = gr[1:nrow(mat_Load)], p = 48)) | |
116 | ||
117 | ## 3. Begin of big loop over ## | |
118 | grid <- n_00:(n_01 - 1) # these are the reference days | |
119 | #h_Load <- h_k1 <- h_k2 <- h_k3 <- double(length(grid)) | |
120 | ||
121 | # output matrix of predicted days | |
122 | # names of the predicted days (reference days + 1) | |
123 | pred_Load <- matrix(NA_real_, ncol = 48, nrow = length(grid)) | |
124 | preds <- lapply(1:K, function(k) pred_Load) | |
125 | ||
126 | go <- Sys.time() | |
127 | for(q in seq_along(grid)[-c(121, 122, 227, 228)]) { #seq_along(grid)[-c(46:47)] | |
128 | if((q %/% 10) == (q / 10)) cat(paste(" iteration: ", q, "\n")) | |
129 | Q <- grid[q] | |
130 | ||
131 | select_Load <- select.wkdata(wk_Load, i_00:Q) | |
132 | aux_Load <- wavkerfun(obj = select_Load, kerneltype = "gaussian", | |
133 | EPS = .Machine$double.eps) | |
134 | pred_Load[q, ] <- predict.wavkerfun(obj = aux_Load, cent = "DIFF")$X | |
135 | ||
136 | selects <- lapply(wks, function(k) select.wkdata(k, i_00:Q)) | |
137 | auxs <- lapply(selects, | |
138 | function(k) wavkerfun(obj = k, | |
139 | kerneltype = "gaussian", | |
140 | EPS = .Machine$double.eps)) | |
141 | for(k in 1:K) | |
142 | preds[[k]][q, ] <- predict.wavkerfun(obj = auxs[[k]], cent = "DIFF")$X | |
143 | ||
144 | } | |
145 | ||
146 | top <- Sys.time() | |
147 | #rm(wks, selects, auxs, aux_Load, mat_ks, q, Q, wk_Load) | |
148 | ||
149 | # Compute the prediction of synchrone by the sum of individuals | |
150 | pred_Load_sum <- Reduce('+', preds) | |
151 | ||
152 | ## 4. Analisis of the predictions #### | |
153 | resids_Load <- pred_Load - mat_Load[grid + 1, ] # i.e. reference days + 1 | |
154 | resids_Load_sum <- pred_Load_sum - mat_Load[grid + 1, ] # i.e. reference days + 1 | |
155 | ||
156 | mape_Load <- 100 * rowMeans(abs(resids_Load / mat_Load[grid + 1, ])) | |
157 | rmse_Load <- sqrt(rowMeans(resids_Load^2)) | |
158 | ||
159 | mape_Load_sum <- 100 * rowMeans(abs(resids_Load_sum / mat_Load[grid + 1, ])) | |
160 | rmse_Load_sum <- sqrt(rowMeans(resids_Load_sum^2)) | |
161 | ||
162 | performK[cf, 2] <- mean(mape_Load, na.rm = TRUE) | |
163 | performK[cf, 3] <- mean(mape_Load_sum, na.rm = TRUE) | |
164 | #print(mean(mape_Load, na.rm = TRUE)) | |
165 | #print(mean(mape_Load_sum, na.rm = TRUE)) | |
166 | ||
167 | } | |
168 | ||
169 | ||
170 | performKwer <- performK | |
171 | ||
172 | #performKrc <- read.table("~/Documents/projects/2014_EDF-Orsay-Lyon2/res/perform200RC.txt") | |
173 | #write.table(file = '~/Documents/projects/2014_EDF-Orsay-Lyon2/res/perform200RC.txt', | |
174 | # performK) | |
175 | write.table(file = '~/Documents/projects/2014_EDF-Orsay-Lyon2/res/perform200WER_3.txt', | |
176 | performK) | |
177 | ||
178 | #performK <- data.frame(K = performKrc$K, | |
179 | # rc = performKrc$rmse, | |
180 | # wer = performKwer$rmse) | |
181 | ||
182 | ||
183 | performK <- performK[order(performK$K), ] | |
184 | ||
185 | ||
186 | #plot(performK$K, performK$rc, type = 'l', ylim = c(1.7,2.2), | |
187 | # lwd = 2, col = 2) | |
188 | #lines(performK$K, performK$wer, col = 3, lwd = 2) | |
189 | #abline(h = mean(performKrc$mape), lwd = 2) | |
190 | #legend("topright", c('Sync 32K', 'Cl. RC', "Cl. WER"), lwd = 2, col = 1:3) |