[epclust.git] / old_C_code / stage2 / src / unused / 06_predictions-ICAME.r

## File : 06_predictions-ICAME.r
## Description : Prévoir les synchrones des classes ICAME. 


rm(list = ls())

if(Sys.info()[4] ==  "mojarrita"){ 
  setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
} else {
  setwd("~/2014_EDF-Orsay-Lyon2/codes/")
}

## 1. Read auxiliar data files ####
#identifiants <- read.table("identifs.txt")[ ,1]
dates0       <- read.table("datesall.txt")[, 1]
dates09      <- as.character(dates0[grep("2009", dates0)])
dates10      <- as.character(dates0[grep("2010", dates0)])
dates        <- c(dates09, dates10)
rm(dates0, dates09, dates10)

#n <- length(identifiants)
p <- length(dates)

# groups for the predictor
gr_calen <- read.table("calendar_transition_groups-1996-2011.txt")
i_start_calen <- which(rownames(gr_calen) == "2009-01-01")
i_end_calen   <- which(rownames(gr_calen) == "2010-12-31")
gr <- gr_calen$gr[i_start_calen:i_end_calen]
gr <- gsub("Thu", "TWT", gr)
gr <- gsub("Wed", "TWT", gr)
gr <- gsub("Tue", "TWT", gr)

days <- seq.Date(from = as.Date("2009/01/01"),
                 to   = as.Date("2010/12/31"),
                 by   = 'day')


setwd("../classes ICAME/synchrones_classif_ICAME/")
files_ICAME <- dir(pattern = "*TR-32V*")
K_ICAME  <- length(files_ICAME)
SC       <- matrix(NA_real_, nrow = K_ICAME, ncol = p + 4) # NA missing in dates 
                                                           # (summer time saving)

for(k in 1:K_ICAME){ 
  df <- read.table(file = files_ICAME[k], header = TRUE)
  SC[k, ] <- subset(x      = df,                    # series must be on rows
                    subset = Year == "2009" | Year == "2010", 
                    select = "Load")[[1]]
}


rm(df, files_ICAME, k)
rm(gr_calen, i_start_calen, i_end_calen)

# Set start and end timepoints for the prediction
i_00      <- which(days == "2009-01-01")
n_00      <- which(days == "2009-12-31") 
n_01      <- which(days == "2010-12-31")


## 2. Transform data into wkdata ####
library(kerwavfun)

mat_Load <- matrix(colSums(SC),   ncol = 48, byrow = TRUE)
mat_ks   <- lapply(1:K_ICAME, function(k) matrix(data  = SC[k, ], 
                                                 ncol  = 48, 
                                                 byrow = TRUE))
  
wk_Load <- wkdata(X  = c(t(mat_Load)), 
                  gr = gr[1:nrow(mat_Load)], 
                  p  = 48)
  
wks <- lapply(mat_ks, function(mat_k) 
       wkdata(X = c(t(mat_k)), gr = gr[1:nrow(mat_Load)], p = 48))
  
## 3. Begin of big loop over grid ##
grid <- n_00:(n_01 - 1) # these are the reference days
#h_Load <- h_k1 <- h_k2 <- h_k3 <- double(length(grid))
  
# output matrix of predicted days 
# names of the predicted days (reference days + 1)
pred_Load <- matrix(NA_real_, ncol = 48, nrow = length(grid))
preds     <- lapply(1:K_ICAME, function(k) pred_Load)
  
go <- Sys.time()
for(q in seq_along(grid)[-c(121, 122, 227, 228)]) { #seq_along(grid)[-c(46:47)]
  if((q %/% 10) == (q / 10)) cat(paste("  iteration: ", q, "\n"))
  Q  <- grid[q]
    
  select_Load <- select.wkdata(wk_Load, i_00:Q)
  aux_Load  <- wavkerfun(obj = select_Load, kerneltype = "gaussian",
                         EPS = .Machine$double.eps) 
  pred_Load[q, ] <- predict.wavkerfun(obj = aux_Load, cent = "DIFF")$X
    
  selects <- lapply(wks, function(k) select.wkdata(k, i_00:Q))
  auxs    <- lapply(selects, 
                    function(k) wavkerfun(obj = k, 
                                          kerneltype = "gaussian",
                                          EPS = .Machine$double.eps))  
  for(k in 1:K_ICAME) 
    preds[[k]][q, ] <- predict.wavkerfun(obj = auxs[[k]], cent = "DIFF")$X
    
}
top <- Sys.time()
#rm(wks, selects, auxs, aux_Load, mat_ks, q, Q, wk_Load)
  

preds <- lapply(preds, function(dd) {dd[dd<0] <- 0; dd} )
# Compute the prediction of synchrone by the sum of individuals
pred_Load_sum <- Reduce('+', preds) 
  

vect_preds <- data.frame(lapply(preds, function(dd) c(t(dd))))
names(vect_preds) <- paste0("icame", 1:K_ICAME)

vect_Load <- c(t(pred_Load))
vect_gtop <- matrix(NA_real_, ncol = K_ICAME, nrow = length(vect_Load))


for(tt in seq_along(vect_gtop)) {
  VALID <- !any(is.na(vect_preds[tt, ])) & # compute gtop only when no NAs
           tt < 17511  # these rows present a problem ==> why? (matbe too much 0s in preds_indiv)
    if(VALID) { 
    res <- gtop(preds_indiv   = as.numeric(vect_preds[tt, ]),
                pred_total    = vect_Load[tt],
                weights_indiv = rep(1, K_ICAME),
                weight_total = 1,
                bounds_indiv = 0.05 * as.numeric(vect_preds[tt, ]))
    vect_gtop[tt, ] <- res$preds_indiv[1:K_ICAME] # last value is the sum
    }
  }

vect_gtop_sum <- rowSums(vect_gtop)
pred_gtop_sum <- matrix(vect_gtop_sum, ncol = 48, byrow = TRUE)

## 4. Analisis of the predictions  ####
resids_Load     <- pred_Load     - mat_Load[grid + 1, ] # i.e. reference days + 1
resids_Load_sum <- pred_Load_sum - mat_Load[grid + 1, ] # i.e. reference days + 1
resids_gtop_sum <- pred_gtop_sum - mat_Load[grid + 1, ]

mape_Load <- 100 * rowMeans(abs(resids_Load / mat_Load[grid + 1, ]))
rmse_Load <- sqrt(rowMeans(resids_Load^2))
  
mape_Load_sum <- 100 * rowMeans(abs(resids_Load_sum / mat_Load[grid + 1, ]))
rmse_Load_sum <- sqrt(rowMeans(resids_Load_sum^2))

mape_gtop_sum <- 100 * rowMeans(abs(resids_gtop_sum / mat_Load[grid + 1, ]))
rmse_gtop_sum <- sqrt(rowMeans(resids_gtop_sum^2))


mean(mape_Load, na.rm = TRUE)
mean(mape_Load_sum, na.rm = TRUE)
mean(mape_gtop_sum, na.rm = TRUE)
#print(mean(mape_Load, na.rm = TRUE))
#print(mean(mape_Load_sum, na.rm = TRUE))
  

mean(rmse_Load, na.rm = TRUE)
mean(rmse_Load_sum, na.rm = TRUE)
mean(rmse_gtop_sum, na.rm = TRUE)
Commit	Line	Data
	1	## File : 06_predictions-ICAME.r
	2	## Description : Prévoir les synchrones des classes ICAME.
	3
	4
	5	rm(list = ls())
	6
	7	if(Sys.info()[4] == "mojarrita"){
	8	setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
	9	} else {
	10	setwd("~/2014_EDF-Orsay-Lyon2/codes/")
	11	}
	12
	13	## 1. Read auxiliar data files ####
	14	#identifiants <- read.table("identifs.txt")[ ,1]
	15	dates0 <- read.table("datesall.txt")[, 1]
	16	dates09 <- as.character(dates0[grep("2009", dates0)])
	17	dates10 <- as.character(dates0[grep("2010", dates0)])
	18	dates <- c(dates09, dates10)
	19	rm(dates0, dates09, dates10)
	20
	21	#n <- length(identifiants)
	22	p <- length(dates)
	23
	24	# groups for the predictor
	25	gr_calen <- read.table("calendar_transition_groups-1996-2011.txt")
	26	i_start_calen <- which(rownames(gr_calen) == "2009-01-01")
	27	i_end_calen <- which(rownames(gr_calen) == "2010-12-31")
	28	gr <- gr_calen$gr[i_start_calen:i_end_calen]
	29	gr <- gsub("Thu", "TWT", gr)
	30	gr <- gsub("Wed", "TWT", gr)
	31	gr <- gsub("Tue", "TWT", gr)
	32
	33	days <- seq.Date(from = as.Date("2009/01/01"),
	34	to = as.Date("2010/12/31"),
	35	by = 'day')
	36
	37
	38	setwd("../classes ICAME/synchrones_classif_ICAME/")
	39	files_ICAME <- dir(pattern = "TR-32V")
	40	K_ICAME <- length(files_ICAME)
	41	SC <- matrix(NA_real_, nrow = K_ICAME, ncol = p + 4) # NA missing in dates
	42	# (summer time saving)
	43
	44	for(k in 1:K_ICAME){
	45	df <- read.table(file = files_ICAME[k], header = TRUE)
	46	SC[k, ] <- subset(x = df, # series must be on rows
	47	subset = Year == "2009" \| Year == "2010",
	48	select = "Load")[[1]]
	49	}
	50
	51
	52	rm(df, files_ICAME, k)
	53	rm(gr_calen, i_start_calen, i_end_calen)
	54
	55	# Set start and end timepoints for the prediction
	56	i_00 <- which(days == "2009-01-01")
	57	n_00 <- which(days == "2009-12-31")
	58	n_01 <- which(days == "2010-12-31")
	59
	60
	61	## 2. Transform data into wkdata ####
	62	library(kerwavfun)
	63
	64	mat_Load <- matrix(colSums(SC), ncol = 48, byrow = TRUE)
	65	mat_ks <- lapply(1:K_ICAME, function(k) matrix(data = SC[k, ],
	66	ncol = 48,
	67	byrow = TRUE))
	68
	69	wk_Load <- wkdata(X = c(t(mat_Load)),
	70	gr = gr[1:nrow(mat_Load)],
	71	p = 48)
	72
	73	wks <- lapply(mat_ks, function(mat_k)
	74	wkdata(X = c(t(mat_k)), gr = gr[1:nrow(mat_Load)], p = 48))
	75
	76	## 3. Begin of big loop over grid ##
	77	grid <- n_00:(n_01 - 1) # these are the reference days
	78	#h_Load <- h_k1 <- h_k2 <- h_k3 <- double(length(grid))
	79
	80	# output matrix of predicted days
	81	# names of the predicted days (reference days + 1)
	82	pred_Load <- matrix(NA_real_, ncol = 48, nrow = length(grid))
	83	preds <- lapply(1:K_ICAME, function(k) pred_Load)
	84
	85	go <- Sys.time()
	86	for(q in seq_along(grid)[-c(121, 122, 227, 228)]) { #seq_along(grid)[-c(46:47)]
	87	if((q %/% 10) == (q / 10)) cat(paste(" iteration: ", q, "\n"))
	88	Q <- grid[q]
	89
	90	select_Load <- select.wkdata(wk_Load, i_00:Q)
	91	aux_Load <- wavkerfun(obj = select_Load, kerneltype = "gaussian",
	92	EPS = .Machine$double.eps)
	93	pred_Load[q, ] <- predict.wavkerfun(obj = aux_Load, cent = "DIFF")$X
	94
	95	selects <- lapply(wks, function(k) select.wkdata(k, i_00:Q))
	96	auxs <- lapply(selects,
	97	function(k) wavkerfun(obj = k,
	98	kerneltype = "gaussian",
	99	EPS = .Machine$double.eps))
	100	for(k in 1:K_ICAME)
	101	preds[[k]][q, ] <- predict.wavkerfun(obj = auxs[[k]], cent = "DIFF")$X
	102
	103	}
	104	top <- Sys.time()
	105	#rm(wks, selects, auxs, aux_Load, mat_ks, q, Q, wk_Load)
	106
	107
	108	preds <- lapply(preds, function(dd) {dd[dd<0] <- 0; dd} )
	109	# Compute the prediction of synchrone by the sum of individuals
	110	pred_Load_sum <- Reduce('+', preds)
	111
	112
	113	vect_preds <- data.frame(lapply(preds, function(dd) c(t(dd))))
	114	names(vect_preds) <- paste0("icame", 1:K_ICAME)
	115
	116	vect_Load <- c(t(pred_Load))
	117	vect_gtop <- matrix(NA_real_, ncol = K_ICAME, nrow = length(vect_Load))
	118
	119
	120	for(tt in seq_along(vect_gtop)) {
	121	VALID <- !any(is.na(vect_preds[tt, ])) & # compute gtop only when no NAs
	122	tt < 17511 # these rows present a problem ==> why? (matbe too much 0s in preds_indiv)
	123	if(VALID) {
	124	res <- gtop(preds_indiv = as.numeric(vect_preds[tt, ]),
	125	pred_total = vect_Load[tt],
	126	weights_indiv = rep(1, K_ICAME),
	127	weight_total = 1,
	128	bounds_indiv = 0.05 * as.numeric(vect_preds[tt, ]))
	129	vect_gtop[tt, ] <- res$preds_indiv[1:K_ICAME] # last value is the sum
	130	}
	131	}
	132
	133	vect_gtop_sum <- rowSums(vect_gtop)
	134	pred_gtop_sum <- matrix(vect_gtop_sum, ncol = 48, byrow = TRUE)
	135
	136	## 4. Analisis of the predictions ####
	137	resids_Load <- pred_Load - mat_Load[grid + 1, ] # i.e. reference days + 1
	138	resids_Load_sum <- pred_Load_sum - mat_Load[grid + 1, ] # i.e. reference days + 1
	139	resids_gtop_sum <- pred_gtop_sum - mat_Load[grid + 1, ]
	140
	141	mape_Load <- 100 * rowMeans(abs(resids_Load / mat_Load[grid + 1, ]))
	142	rmse_Load <- sqrt(rowMeans(resids_Load^2))
	143
	144	mape_Load_sum <- 100 * rowMeans(abs(resids_Load_sum / mat_Load[grid + 1, ]))
	145	rmse_Load_sum <- sqrt(rowMeans(resids_Load_sum^2))
	146
	147	mape_gtop_sum <- 100 * rowMeans(abs(resids_gtop_sum / mat_Load[grid + 1, ]))
	148	rmse_gtop_sum <- sqrt(rowMeans(resids_gtop_sum^2))
	149
	150
	151	mean(mape_Load, na.rm = TRUE)
	152	mean(mape_Load_sum, na.rm = TRUE)
	153	mean(mape_gtop_sum, na.rm = TRUE)
	154	#print(mean(mape_Load, na.rm = TRUE))
	155	#print(mean(mape_Load_sum, na.rm = TRUE))
	156
	157
	158	mean(rmse_Load, na.rm = TRUE)
	159	mean(rmse_Load_sum, na.rm = TRUE)
	160	mean(rmse_gtop_sum, na.rm = TRUE)