From: Benjamin Auder Date: Mon, 24 Apr 2017 19:19:45 +0000 (+0200) Subject: revise package structure: always predict from 1am to horizon, dataset not cut at... X-Git-Url: https://git.auder.net/game/%7B%7B%20asset%28%27mixstore/css/user/pieces/cq.svg?a=commitdiff_plain;h=d2ab47a744d8fb29c03a76a7ca2368dae53f9a57;p=talweg.git revise package structure: always predict from 1am to horizon, dataset not cut at predict_at --- diff --git a/Etude_En_Cours_R_E_bis.R b/Etude_En_Cours_R_E_bis.R new file mode 100644 index 0000000..69b1da0 --- /dev/null +++ b/Etude_En_Cours_R_E_bis.R @@ -0,0 +1,136 @@ +setwd("/Users/bp/Desktop/CONTRATS_AirNormand/2016/RapportFinalBruno") +rm(list=ls()) + +# Lecture des données +pm = read.table("DATA/mesures_horaires_hloc_pm10_a_filer.csv",sep=";",dec=".",header=T) #,row.names=1) +n = dim(pm)[1] +datedebut = "10/12/2008" + +# Chargement des données météo et indicateurs +VarExp <- read.table("DATA/meteo_extra_jourMois.csv",sep=";",dec=".",header=T) +VarExp <- VarExp[-1,] + +# Lecture des dates +dates = read.table("DATA/Dates_jours.csv",header=F,as.is=T) +dates = dates[,1] + +# Contruction des matrices de données +pm.h <- matrix(pm[,2],ncol=24,byrow=TRUE) + +Nlignes = nrow(pm.h) +Data = cbind(pm.h[1:(Nlignes -1), ], pm.h[2:Nlignes, ]) +dates2 = dates[2:Nlignes] +rownames(Data) = dates2 +# df contient l'ensemble des données. +#df <- cbind(Data,varexp[,-1]) +df <- Data +PMjour <- apply(df[,25:48],1,mean,na.rm=T) +dfexp <- cbind(VarExp,PMjour) + +Dates = c( +"16/03/2015", +"19/01/2015", +"27/04/2015") + +Categorie = c("Epandage", "Chauffage", "Non Polluée") + +RepFig = "FIGURES_Etude" + +ResDates = NULL + +nbvois=10 +j=1 # numéro de semaine +ij=6 # numéro du jour (0 = lundi) + +Err24 = NULL +ErrPrev = NULL +Kvois = NULL + +for (Hc in 5:24) { + +H=24+Hc +fen=H +#fen=12 ou 24 ou H +L = (H-fen+1):H + +# Premier conditionnement : mois +indcond <- dfexp[,"Mois"] == 2 | dfexp[,"Mois"] == 3 | dfexp[,"Mois"] == 4 | dfexp[,"Mois"] == 9 | dfexp[,"Mois"] == 10 +data = df[indcond,] +varexp = dfexp[indcond,] + +nl = (1:nrow(data))[rownames(data)==Dates[j]] +dateJPrev = rownames(data)[nl+ij] +dataj = as.numeric(data[nl+ij, 1:48]) +data = data[-(nl + ij), ] +varexp = varexp[-(nl + ij), ] +indNA = attr(na.omit(data[, 1:48]),"na.action") +data = data[-indNA,] +varexp = varexp[-indNA,] + +# Conditionnement : les jours avec PMjour +/- large +large = 1 +bornes = mean(dataj[25:48])+c(-large,large) +indcond = varexp[,"PMjour"]>=bornes[1] & varexp[,"PMjour"]<=bornes[2] +data = data[indcond,] +varexp = varexp[indcond,] + +D = rep(0,nrow(data)) +for (k in 1:nrow(data)) { + #D[k] = sqrt(sum((1:H)*(dataj[L] -data[k,L])^2)) + D[k] = sqrt(sum((dataj[L] -data[k,L])^2)) +} +ind = order(D)[1:nbvois] +w = 1/(D[ind]^2) +w = w/sum(w) +W = w%o%rep(1,48) +JourMoy = apply(data[ind, 1:48], 2, mean) +#JourMoy = apply(W*data[ind, 1:48], 2, sum) +NomFile = paste("Voisins_Epandage_PMjour_Hc_",Hc,".png",sep="") +Titre = paste("Jour à prévoir : ",dateJPrev," - ", length(ind)," voisins",sep="") +#erreur = sqrt(sum((dataj[25:48] - JourMoy[25:48])^2)) +if(Hc==24){erreurPrev=NA}else{erreurPrev = mean(abs(dataj[(H+1):48] - JourMoy[(H+1):48]))} +erreur24 = mean(abs(dataj[25:48] - JourMoy[25:48])) +png(NomFile) +matplot(t(data[ind, 1:48]), type = "l", lwd=1.4, lty=1, col=1:length(ind), + cex.axis=1.4, cex.main = 1.7, cex.lab=1.5, + xlab="Heures locales", ylab=paste0("PM10 - Erreurs = ",round(erreur24,1)," / ",round(erreurPrev,1)), main=Titre) +legend("top",rownames(data)[ind],ncol=2,lwd=1.4, lty=1, col=1:length(ind)) +lines(1:48, dataj, lwd=2.5) +lines(JourMoy, lty = 2, lwd=2) +abline(v=c(24.5,H+0.5), lty = 2, lwd=1.2) +xx=dev.off() + + + + Err24 = c(Err24, erreur24) + ErrPrev = c(ErrPrev, erreurPrev) +ResDates = cbind(ResDates, rownames(data)[ind]) +} + + +rownames(ResDates) = 1:10 + +Kvois = NULL +for (Col in ncol(ResDates):1) { +K = 0 +for (I in 1:10){ + for (J in 1:10){ + if (ResDates[I,1] == ResDates[J,Col]) K = K +1 + } +} +Kvois = c(Kvois, K) +} + +# pdf("Erreur_Epandage_PMjour.pdf") +ymin = min(na.omit(ErrPrev), Err24) +ymin = min(ymin, Kvois) +ymax = max(na.omit(ErrPrev), Err24) +ymax = max(ymax, Kvois) +plot(5:24,Err24, type = "l", lwd = 2, cex.axis=1.4, cex.lab = 1.5, + ylab = "MAE",xlab = "Heures de prévision", ylim= c(ymin,ymax)) +lines(5:24,ErrPrev, lwd=2, lty = 2) +legend("topright", legend=c("Erreur 24h", "Erreur prévision"), lty = c(1,2), lwd=2, cex=1.5) +points(5:24, Kvois, cex=1.8, pch = 19) +# xx = dev.off() + +length(D) diff --git a/pkg/R/Data.R b/pkg/R/Data.R index 33db5c9..39e34de 100644 --- a/pkg/R/Data.R +++ b/pkg/R/Data.R @@ -2,53 +2,47 @@ #' #' Data encapsulation as a list (days) of lists (components). #' -#' The private field .data is a list where each cell contains the variables for a period -#' of time of 24 hours, from time P+1 until P the next day where P is an integer between -#' 0 and 23. .data[[1]] refers to the first measured data (serie and exogenous -#' variables): the corresponding times can be accessed through the function -#' \code{getTime()} below. Each cell .data[[i]] is itself a list containing five slots, -#' as described in the 'field' section. +#' The private field .tv is a list where each cell contains the hourly variables for a +#' period of time of 24 hours, from 1am to next midnight. The other lists contain +#' informations on series' levels and exogenous variables (both measured and predicted). #' #' @usage # Data$new() #' -#' @field .data[[i]] List of +#' @field .tv List of "time-values"; in each cell: #' \itemize{ #' \item time: vector of times -#' \item centered_serie: centered serie -#' \item level: corresponding level -#' \item exo: exogenous variables -#' \item exo_hat: predicted exogenous variables (for next day) +#' \item serie: (measured) serie #' } +#' @field .level Vector of measured levels +#' @field .level_hat Vector of predicted levels +#' @field .exo List of measured exogenous variables, cell = numerical vector. +#' @field .exo_hat List of predicted exogenous variables, cell = numerical vector. #' #' @section Methods: #' \describe{ #' \item{\code{getSize()}}{ #' Number of series in dataset.} -#' \item{\code{getStdHorizon()}}{ -#' Number of time steps from serie[1] until midnight} -#' \item{\code{append(time, serie, exo, exo_hat)}}{ +#' \item{\code{append(time, value, level_hat, exo, exo_hat)}}{ #' Measured data for given vector of times + exogenous predictions from #' last midgnight.} #' \item{\code{getTime(index)}}{ #' Times (vector) at specified index.} -#' \item{\code{getCenteredSerie(index)}}{ -#' Centered serie at specified index.} -#' \item{\code{getCenteredSeries(indices)}}{ -#' Centered series at specified indices (in columns).} -#' \item{\code{getLevel(index)}}{ -#' Level at specified index.} #' \item{\code{getSerie(index)}}{ #' Serie (centered+level) at specified index.} #' \item{\code{getSeries(indices)}}{ #' Series at specified indices (in columns).} -#' \item{\code{getExoHat(index)}}{ -#' Predicted exogenous variables at specified index.} +#' \item{\code{getLevel(index)}}{ +#' Measured level at specified index.} +#' \item{\code{getLevelHat(index)}}{ +#' Predicted level at specified index.} +#' \item{\code{getCenteredSerie(index)}}{ +#' Centered serie at specified index.} +#' \item{\code{getCenteredSeries(indices)}}{ +#' Centered series at specified indices (in columns).} #' \item{\code{getExo(index)}}{ #' Measured exogenous variables at specified index.} -#' \item{\code{removeFirst()}}{ -#' Remove first list element (if truncated).} -#' \item{\code{removeLast()}}{ -#' Remove last list element (if truncated).} +#' \item{\code{getExoHat(index)}}{ +#' Predicted exogenous variables at specified index.} #' } #' #' @docType class @@ -56,66 +50,87 @@ #' Data = R6::R6Class("Data", private = list( - .data = list() + .tv = list(), + .level = vector("double",0), + .level_hat = vector("double",0), + .exo = list(), + .exo_hat = list() ), public = list( getSize = function() - length(private$.data) - , - getStdHorizon = function() - 24 - as.POSIXlt( private$.data[[1]]$time[1] )$hour + 1 + length(private$.tv) , - append = function(time, serie, exo, exo_hat) + append = function(time=NULL, value=NULL, level_hat=NULL, exo=NULL, exo_hat=NULL) { - level = mean(serie, na.rm=TRUE) - centered_serie = serie - level - private$.data[[length(private$.data)+1]] <- list( - "time"=time, #H-24 --> H-1 - "centered_serie"=centered_serie, #at 'time' - "level"=level, #at 'time' - "exo"=exo, #at 'time' (yersteday 0am to last midnight) - "exo_hat"=exo_hat) #today 0am to next midnight + if (!is.null(time) && !is.null(value)) + { + L = length(private$.tv) + if (L == 0 || strftime( tail(private$.tv[[L]]$time,1), + format="%H:%M:%S", tz="GMT" ) == "00:00:00") + { + # Append a new cell + private$.tv[[L+1]] <- list("time"=time, "serie"=value) + } + else + { + # Complete current cell + private$.tv[[L]]$time = c(private$.tv[[L]]$time, time) + private$.tv[[L]]$serie = c(private$.tv[[L]]$serie, value) + } + } + if (strftime( tail(private$.tv[[length(private$.tv)]]$time,1), + format="%H:%M:%S", tz="GMT" ) == "00:00:00") + { + private$.level = c(private$.level, + mean(private$.tv[[length(private$.tv)]]$serie, na.rm=TRUE)) + } + if (!is.null(level_hat)) + private$.level_hat = c(private$.level_hat, level_hat) + if (!is.null(exo)) + private$.exo[[length(private$.exo)+1]] = exo + if (!is.null(exo_hat)) + private$.exo_hat[[length(private$.exo_hat)+1]] = exo_hat }, getTime = function(index) { index = dateIndexToInteger(index, self) - private$.data[[index]]$time + private$.tv[[index]]$time }, - getCenteredSerie = function(index) + getSerie = function(index) { index = dateIndexToInteger(index, self) - private$.data[[index]]$centered_serie + private$.tv[[index]]$serie }, - getCenteredSeries = function(indices) - sapply(indices, function(i) self$getCenteredSerie(i)) + getSeries = function(indices) + sapply(indices, function(i) self$getSerie(i)) , getLevel = function(index) { index = dateIndexToInteger(index, self) - private$.data[[index]]$level + private$.level[index] }, - getSerie = function(index) + getLevelHat = function(index) { index = dateIndexToInteger(index, self) - private$.data[[index]]$centered_serie + private$.data[[index]]$level + private$.level_hat[index] }, - getSeries = function(indices) - sapply(indices, function(i) self$getSerie(i)) - , - getExoHat = function(index) + getCenteredSerie = function(index) { index = dateIndexToInteger(index, self) - private$.data[[index]]$exo_hat + private$.tv[[index]]$serie - private$.level[index] }, + getCenteredSeries = function(indices) + sapply(indices, function(i) self$getCenteredSerie(i)) + , getExo = function(index) { index = dateIndexToInteger(index, self) - private$.data[[index]]$exo + private$.exo[[index]] }, - removeFirst = function() - private$.data <- private$.data[2:length(private$.data)] - , - removeLast = function() - private$.data <- private$.data[1:(length(private$.data)-1)] + getExoHat = function(index) + { + index = dateIndexToInteger(index, self) + private$.exo_hat[[index]] + } ) ) diff --git a/pkg/R/F_Average.R b/pkg/R/F_Average.R index d0b40b4..6cd2d6e 100644 --- a/pkg/R/F_Average.R +++ b/pkg/R/F_Average.R @@ -17,25 +17,23 @@ AverageForecaster = R6::R6Class("AverageForecaster", inherit = Forecaster, public = list( - predictShape = function(data, today, memory, horizon, ...) + predictShape = function(data, today, memory, predict_from, horizon, ...) { - avg = rep(0., horizon) + avg = rep(0., (horizon-predict_from+1)) first_day = max(1, today-memory) - index = today-7 + 1 + index <- today nb_no_na_series = 0 repeat { - { - serie_on_horizon = data$getCenteredSerie(index)[1:horizon] - index = index - 7 - }; + index = index - 7 + if (index < first_day) + break + serie_on_horizon = data$getCenteredSerie(index)[predict_from:horizon] if (!any(is.na(serie_on_horizon))) { avg = avg + serie_on_horizon nb_no_na_series = nb_no_na_series + 1 - }; - if (index < first_day) - break + } } avg / nb_no_na_series } diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R index 1437442..ea18bb6 100644 --- a/pkg/R/F_Neighbors.R +++ b/pkg/R/F_Neighbors.R @@ -4,14 +4,15 @@ #' where days in the past are chosen and weighted according to some similarity measures. #' #' The main method is \code{predictShape()}, taking arguments data, today, memory, -#' horizon respectively for the dataset (object output of \code{getData()}), the current -#' index, the data depth (in days) and the number of time steps to forecast. +#' predict_from, horizon respectively for the dataset (object output of +#' \code{getData()}), the current index, the data depth (in days), the first predicted +#' hour and the last predicted hour. #' In addition, optional arguments can be passed: #' \itemize{ #' \item local : TRUE (default) to constrain neighbors to be "same days within same #' season" #' \item simtype : 'endo' for a similarity based on the series only, -#' 'exo' for a similaruty based on exogenous variables only, +#' 'exo' for a similarity based on exogenous variables only, #' 'mix' for the product of 'endo' and 'exo', #' 'none' (default) to apply a simple average: no computed weights #' \item window : A window for similarities computations; override cross-validation @@ -38,17 +39,20 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", inherit = Forecaster, public = list( - predictShape = function(data, today, memory, horizon, ...) + predictShape = function(data, today, memory, predict_from, horizon, ...) { # (re)initialize computed parameters private$.params <- list("weights"=NA, "indices"=NA, "window"=NA) # Do not forecast on days with NAs (TODO: softer condition...) - if (any(is.na(data$getCenteredSerie(today)))) + if (any(is.na(data$getSerie(today-1))) + || any(is.na(data$getSerie(today)[1:(predict_from-1)]))) + { return (NA) + } # Determine indices of no-NAs days followed by no-NAs tomorrows - fdays = .getNoNA2(data, max(today-memory,1), today-1) + fdays = .getNoNA2(data, max(today-memory,1), today-2) # Get optional args local = ifelse(hasArg("local"), list(...)$local, TRUE) #same level + season? @@ -56,7 +60,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", if (hasArg("window")) { return ( private$.predictShapeAux(data, - fdays, today, horizon, local, list(...)$window, simtype, TRUE) ) + fdays, today, predict_from, horizon, local, list(...)$window, simtype, TRUE) ) } # Indices of similar days for cross-validation; TODO: 20 = magic number @@ -71,13 +75,13 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", for (i in seq_along(cv_days)) { # mix_strategy is never used here (simtype != "mix"), therefore left blank - prediction = private$.predictShapeAux(data, fdays, cv_days[i], horizon, local, - window, simtype, FALSE) + prediction = private$.predictShapeAux(data, fdays, cv_days[i], predict_from, + horizon, local, window, simtype, FALSE) if (!is.na(prediction[1])) { nb_jours = nb_jours + 1 error = error + - mean((data$getSerie(cv_days[i]+1)[1:horizon] - prediction)^2) + mean((data$getSerie(cv_days[i]+1)[predict_from:horizon] - prediction)^2) } } return (error / nb_jours) @@ -105,14 +109,14 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", else #none: value doesn't matter 1 - return(private$.predictShapeAux(data, fdays, today, horizon, local, - best_window, simtype, TRUE)) + return( private$.predictShapeAux(data, fdays, today, predict_from, horizon, local, + best_window, simtype, TRUE) ) } ), private = list( # Precondition: "today" is full (no NAs) - .predictShapeAux = function(data, fdays, today, horizon, local, window, simtype, - final_call) + .predictShapeAux = function(data, fdays, today, predict_from, horizon, local, window, + simtype, final_call) { fdays_cut = fdays[ fdays < today ] if (length(fdays_cut) <= 1) @@ -135,7 +139,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", private$.params$indices <- fdays private$.params$window <- 1 } - return ( data$getSerie(fdays[1])[1:horizon] ) + return ( data$getSerie(fdays[1]+1)[predict_from:horizon] ) } } else @@ -147,10 +151,10 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", window_endo = ifelse(simtype=="mix", window[1], window) # Distances from last observed day to days in the past - serieToday = data$getSerie(today) + lastSerie = c( data$getSerie(today-1), data$getSerie(today)[1:(predict_from-1)] ) distances2 = sapply(fdays, function(i) { - delta = serieToday - data$getSerie(i) - mean(delta^2) + delta = lastSerie - c(data$getSerie(i),data$getSerie(i+1)[1:(predict_from-1)]) + sqrt(mean(delta^2)) }) simils_endo <- .computeSimils(distances2, window_endo) @@ -161,12 +165,12 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", # Compute exogen similarities using given window window_exo = ifelse(simtype=="mix", window[2], window) - M = matrix( nrow=1+length(fdays), ncol=1+length(data$getExo(today)) ) - M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) ) + M = matrix( ncol=1+length(fdays), nrow=1+length(data$getExo(1)) ) + M[,1] = c( data$getLevelHat(today), as.double(data$getExoHat(today)) ) for (i in seq_along(fdays)) - M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) ) + M[,i+1] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) ) - sigma = cov(M) #NOTE: robust covariance is way too slow + sigma = cov(t(M)) #NOTE: robust covariance is way too slow # TODO: 10 == magic number; more robust way == det, or always ginv() sigma_inv = if (length(fdays) > 10) @@ -176,7 +180,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", # Distances from last observed day to days in the past distances2 = sapply(seq_along(fdays), function(i) { - delta = M[1,] - M[i+1,] + delta = M[,1] - M[,i+1] delta %*% sigma_inv %*% delta }) @@ -194,9 +198,12 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", rep(1, length(fdays)) similarities = similarities / sum(similarities) - prediction = rep(0, horizon) + prediction = rep(0, horizon-predict_from+1) for (i in seq_along(fdays)) - prediction = prediction + similarities[i] * data$getSerie(fdays[i]+1)[1:horizon] + { + prediction = prediction + + similarities[i] * data$getSerie(fdays[i]+1)[predict_from:horizon] + } if (final_call) { @@ -230,10 +237,13 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", # .getConstrainedNeighbs = function(today, data, fdays, min_neighbs=10, max_neighbs=12) { - levelToday = data$getLevel(today) - distances = sapply(fdays, function(i) abs(data$getLevel(i)-levelToday)) - #TODO: 2, +3 : magic numbers - dist_thresh = 2 + levelToday = data$getLevelHat(today) + levelYersteday = data$getLevel(today-1) + distances = sapply(fdays, function(i) { + sqrt((data$getLevel(i)-levelYersteday)^2 + (data$getLevel(i+1)-levelToday)^2) + }) + #TODO: 1, +1, +3 : magic numbers + dist_thresh = 1 min_neighbs = min(min_neighbs,length(fdays)) repeat { @@ -241,15 +251,14 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", nb_neighbs = sum(same_pollution) if (nb_neighbs >= min_neighbs) #will eventually happen break - dist_thresh = dist_thresh + 3 + dist_thresh = dist_thresh + ifelse(dist_thresh>1,3,1) } fdays = fdays[same_pollution] max_neighbs = 12 if (nb_neighbs > max_neighbs) { # Keep only max_neighbs closest neighbors - fdays = fdays[ - sort(distances[same_pollution],index.return=TRUE)$ix[1:max_neighbs] ] + fdays = fdays[ order(distances[same_pollution])[1:max_neighbs] ] } fdays } diff --git a/pkg/R/F_Persistence.R b/pkg/R/F_Persistence.R index dde8612..7035231 100644 --- a/pkg/R/F_Persistence.R +++ b/pkg/R/F_Persistence.R @@ -18,23 +18,21 @@ PersistenceForecaster = R6::R6Class("PersistenceForecaster", inherit = Forecaster, public = list( - predictShape = function(data, today, memory, horizon, ...) + predictShape = function(data, today, memory, predict_from, horizon, ...) { # Return centered last (similar) day curve, avoiding NAs until memory is run first_day = max(1, today-memory) same_day = ifelse(hasArg("same_day"), list(...)$same_day, TRUE) - # If 'same_day', get the last known future of similar day: -7 + 1 == -6 - index = today - ifelse(same_day,6,0) + index <- today repeat { - { - last_serie = data$getCenteredSerie(index)[1:horizon] - index = index - ifelse(same_day,7,1) - }; - if (!any(is.na(last_serie))) - return (last_serie); + # If 'same_day', get the last known future of similar day + index = index - ifelse(same_day,7,1) if (index < first_day) return (NA) + last_serie = data$getCenteredSerie(index)[predict_from:horizon] + if (!any(is.na(last_serie))) + return (last_serie) } } ) diff --git a/pkg/R/F_Zero.R b/pkg/R/F_Zero.R index 56bb4a5..4e1ae11 100644 --- a/pkg/R/F_Zero.R +++ b/pkg/R/F_Zero.R @@ -13,7 +13,7 @@ ZeroForecaster = R6::R6Class("ZeroForecaster", inherit = Forecaster, public = list( - predictShape = function(data, today, memory, horizon, ...) - rep(0, horizon) + predictShape = function(data, today, memory, predict_from, horizon, ...) + rep(0, (horizon-predict_from+1)) ) ) diff --git a/pkg/R/Forecast.R b/pkg/R/Forecast.R index 323ebfb..a983046 100644 --- a/pkg/R/Forecast.R +++ b/pkg/R/Forecast.R @@ -3,11 +3,9 @@ #' Forecast encapsulation as a list (days where prediction occur) of lists (components). #' #' The private field .pred is a list where each cell contains the predicted variables for -#' a period of time of H<=24 hours, from hour P+1 until P+H, where P+1 is taken right -#' after the end of the period designated by \code{getIndexInData()}. In other terms, -#' \code{forecast$getForecast(i)} return forecasts for \code{data$getSerie(i+1)}. -#' Each cell .pred[[i]] is itself a list containing three slots, as described in the -#' 'field' section. +#' a period of time of H<=24 hours, from hour P until P+H-1, where P == predict_from. +#' \code{forecast$getForecast(i)} output forecasts for +#' \code{data$getSerie(forecast$getIndexInData(i))}. #' #' @usage # Forecast$new(dates) #' diff --git a/pkg/R/Forecaster.R b/pkg/R/Forecaster.R index cefa1ea..5258760 100644 --- a/pkg/R/Forecaster.R +++ b/pkg/R/Forecaster.R @@ -52,15 +52,20 @@ Forecaster = R6::R6Class("Forecaster", private$.pjump <- pjump invisible(self) }, - predictSerie = function(data, today, memory, horizon, ...) + predictSerie = function(data, today, memory, predict_from, horizon, ...) { # Parameters (potentially) computed during shape prediction stage - predicted_shape = self$predictShape(data, today, memory, horizon, ...) - predicted_delta = private$.pjump(data,today,memory,horizon,private$.params,...) - # Predicted shape is aligned it on the end of current day + jump - predicted_shape+tail(data$getSerie(today),1)-predicted_shape[1]+predicted_delta + predicted_shape = self$predictShape(data,today,memory,predict_from,horizon,...) + predicted_delta = private$.pjump(data, today, memory, predict_from, horizon, + private$.params, ...) + + # Predicted shape is aligned on the end of current day + jump + c( data$getSerie(today)[if (predict_from>=2) 1:(predict_from-1) else c()], + predicted_shape - predicted_shape[1] + predicted_delta + + ifelse(predict_from>=2, + data$getSerie(today)[predict_from-1], tail(data$getSerie(today-1),1)) ) }, - predictShape = function(data, today, memory, horizon, ...) + predictShape = function(data, today, memory, predict_from, horizon, ...) NULL #empty default implementation: to implement in inherited classes , getParameters = function() diff --git a/pkg/R/J_Neighbors.R b/pkg/R/J_Neighbors.R index 40341d9..58453ea 100644 --- a/pkg/R/J_Neighbors.R +++ b/pkg/R/J_Neighbors.R @@ -10,7 +10,8 @@ #' #' @aliases J_Neighbors #' -getNeighborsJumpPredict = function(data, today, memory, horizon, params, ...) +getNeighborsJumpPredict = function(data, today, memory, predict_from, horizon, + params, ...) { first_day = max(1, today-memory) filter = (params$indices >= first_day) @@ -18,7 +19,10 @@ getNeighborsJumpPredict = function(data, today, memory, horizon, params, ...) weights = params$weights[filter] gaps = sapply(indices, function(i) { - head( data$getSerie(i+1),1 ) - tail( data$getSerie(i),1 ) + if (predict_from >= 2) + data$getSerie(i+1)[predict_from] - data$getSerie(i+1)[predict_from-1] + else + head(data$getSerie(i+1),1) - tail(data$getSerie(i),1) }) scal_product = weights * gaps norm_fact = sum( weights[!is.na(scal_product)] ) diff --git a/pkg/R/J_Persistence.R b/pkg/R/J_Persistence.R index 9e56742..8298a89 100644 --- a/pkg/R/J_Persistence.R +++ b/pkg/R/J_Persistence.R @@ -10,22 +10,25 @@ #' #' @aliases J_Persistence #' -getPersistenceJumpPredict = function(data, today, memory, horizon, params, ...) +getPersistenceJumpPredict = function(data, today, memory, predict_from, + horizon, params, ...) { #return gap between end of similar day curve and first day of tomorrow (in the past) first_day = max(1, today-memory) same_day = ifelse(hasArg("same_day"), list(...)$same_day, TRUE) - index = today - ifelse(same_day,7,1) + index <- today repeat { - { - last_serie_end = tail( data$getSerie(index), 1) - last_tomorrow_begin = head( data$getSerie(index+1), 1) - index = index - ifelse(same_day,7,1) - }; - if (!is.na(last_serie_end) && !is.na(last_tomorrow_begin)) - return (last_tomorrow_begin - last_serie_end); + # If 'same_day', get the last known future of similar day + index = index - ifelse(same_day,7,1) if (index < first_day) return (NA) + gap <- + if (predict_from >= 2) + data$getSerie(index)[predict_from] - data$getSerie(index)[predict_from-1] + else + head(data$getSerie(index),1) - tail(data$getSerie(index-1),1) + if (!is.na(gap)) + return (gap) } } diff --git a/pkg/R/computeError.R b/pkg/R/computeError.R index 3aa028f..0b1771f 100644 --- a/pkg/R/computeError.R +++ b/pkg/R/computeError.R @@ -13,7 +13,7 @@ #' step, averaged on the L forecasting days. #' #' @export -computeError = function(data, pred, horizon=data$getStdHorizon()) +computeError = function(data, pred, predict_from, horizon=length(data$getSerie(1))) { L = pred$getSize() mape_day = rep(0, horizon) @@ -25,8 +25,8 @@ computeError = function(data, pred, horizon=data$getStdHorizon()) for (i in seq_len(L)) { index = pred$getIndexInData(i) - serie = data$getSerie(index+1)[1:horizon] - forecast = pred$getForecast(i)[1:horizon] + serie = data$getSerie(index)[predict_from:horizon] + forecast = pred$getForecast(i)[predict_from:horizon] if (!any(is.na(serie)) && !any(is.na(forecast))) { nb_no_NA_data = nb_no_NA_data + 1 diff --git a/pkg/R/computeForecast.R b/pkg/R/computeForecast.R index a4a539a..e967cc7 100644 --- a/pkg/R/computeForecast.R +++ b/pkg/R/computeForecast.R @@ -49,13 +49,16 @@ #' #do_something_with_pred #' }} #' @export -computeForecast = function(data, indices, forecaster, pjump, - memory=Inf, horizon=data$getStdHorizon(), ncores=3, ...) +computeForecast = function(data, indices, forecaster, pjump, predict_from, + memory=Inf, horizon=length(data$getSerie(1)), ncores=3, ...) { # (basic) Arguments sanity checks + predict_from = as.integer(predict_from)[1] + if (! predict_from %in% 1:length(data$getSerie(1))) + stop("predict_from in [1,24] (hours)") horizon = as.integer(horizon)[1] - if (horizon<=0 || horizon>length(data$getCenteredSerie(1))) - stop("Horizon too short or too long") + if (horizon<=predict_from || horizon>length(data$getSerie(1))) + stop("Horizon in [predict_from+1,24] (hours)") integer_indices = sapply(indices, function(i) dateIndexToInteger(i,data)) if (any(integer_indices<=0 | integer_indices>data$getSize())) stop("Indices out of range") @@ -73,7 +76,7 @@ computeForecast = function(data, indices, forecaster, pjump, p <- parallel::mclapply(seq_along(integer_indices), function(i) { list( "forecast" = forecaster$predictSerie( - data, integer_indices[i], memory, horizon, ...), + data, integer_indices[i], memory, predict_from, horizon, ...), "params"= forecaster$getParameters(), "index" = integer_indices[i] ) }, mc.cores=ncores) @@ -83,7 +86,7 @@ computeForecast = function(data, indices, forecaster, pjump, p <- lapply(seq_along(integer_indices), function(i) { list( "forecast" = forecaster$predictSerie( - data, integer_indices[i], memory, horizon, ...), + data, integer_indices[i], memory, predict_from, horizon, ...), "params"= forecaster$getParameters(), "index" = integer_indices[i] ) }) diff --git a/pkg/R/getData.R b/pkg/R/getData.R index a6401f9..f1f8861 100644 --- a/pkg/R/getData.R +++ b/pkg/R/getData.R @@ -10,11 +10,8 @@ #' @param exo_data Exogenous variables, as a data frame or a CSV file; first column is #' dates, next block are measurements for the day, and final block are exogenous #' forecasts (for the same day). -#' @param input_tz Timezone in the input files ("GMT" or e.g. "Europe/Paris") #' @param date_format How date/time are stored (e.g. year/month/day hour:minutes; #' see ?strptime) -#' @param working_tz Timezone to work with ("GMT" or e.g. "Europe/Paris") -#' @param predict_at When does the prediction take place? Integer, in hours. Default: 0 #' @param limit Number of days to extract (default: Inf, for "all") #' #' @return An object of class Data @@ -24,14 +21,9 @@ #' exo_data = read.csv(system.file("extdata","meteo_extra_noNAs.csv",package="talweg")) #' data = getData(ts_data, exo_data, limit=120) #' @export -getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:%M", - working_tz="GMT", predict_at=0, limit=Inf) +getData = function(ts_data, exo_data, date_format="%d/%m/%Y %H:%M", limit=Inf) { # Sanity checks (not full, but sufficient at this stage) - if (!is.character(input_tz) || !is.character(working_tz)) - stop("Bad timezone (see ?timezone)") - input_tz = input_tz[1] - working_tz = working_tz[1] if ( (!is.data.frame(ts_data) && !is.character(ts_data)) || (!is.data.frame(exo_data) && !is.character(exo_data)) ) stop("Bad time-series / exogenous input (data frame or CSV file)") @@ -39,22 +31,20 @@ getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:% ts_data = ts_data[1] if (is.character(exo_data)) exo_data = exo_data[1] - predict_at = as.integer(predict_at)[1] - if (predict_at<0 || predict_at>23) - stop("Bad predict_at (0-23)") if (!is.character(date_format)) stop("Bad date_format (character)") date_format = date_format[1] + if (!is.numeric(limit) || limit < 0) + stop("limit: positive integer") ts_df = if (is.character(ts_data)) read.csv(ts_data) else ts_data - # Convert to the desired timezone (usually "GMT" or "Europe/Paris") - formatted_dates_POSIXlt = strptime(as.character(ts_df[,1]), date_format, tz=input_tz) - ts_df[,1] = format( - as.POSIXct(formatted_dates_POSIXlt, tz=input_tz), tz=working_tz, usetz=TRUE) + # Convert to GMT (pretend it's GMT; no impact) + dates_POSIXlt = strptime(as.character(ts_df[,1]), date_format, tz="GMT") + ts_df[,1] = format(as.POSIXct(dates_POSIXlt, tz="GMT"), tz="GMT", usetz=TRUE) exo_df = if (is.character(exo_data)) @@ -80,29 +70,18 @@ getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:% line = line + 1 }; if (line >= nb_lines + 1 - || as.POSIXlt(ts_df[line-1,1],tz=working_tz)$hour == predict_at) + || as.POSIXlt(ts_df[line-1,1],tz="GMT")$hour == 0) { break } } - exo = as.data.frame( exo_df[i,2:(1+nb_exos)] ) - exo_hat = - if (i < nrow(exo_df)) - as.data.frame( exo_df[i+1,(1+nb_exos+1):(1+2*nb_exos)] ) - else - NA #exogenous prediction for next day are useless on last day - data$append(time, serie, exo, exo_hat) + # TODO: 2 modes, "operational" and "testing"; would need PM10 predictions + data$append(time=time, value=serie, level_hat=mean(serie,na.rm=TRUE), + exo=exo_df[i,2:(1+nb_exos)], exo_hat=exo_df[i,(1+nb_exos+1):(1+2*nb_exos)]) if (i >= limit) break i = i + 1 } - if (length(data$getCenteredSerie(1)) < length(data$getCenteredSerie(2))) - data$removeFirst() - if (length(data$getCenteredSerie(data$getSize())) - < length(data$getCenteredSerie(data$getSize()-1))) - { - data$removeLast() - } data } diff --git a/pkg/R/plot.R b/pkg/R/plot.R index fe2fb4e..59a26a7 100644 --- a/pkg/R/plot.R +++ b/pkg/R/plot.R @@ -82,9 +82,8 @@ plotError <- function(err, cols=seq_along(err)) #' @export plotPredReal <- function(data, pred, index) { - horizon = length(pred$getForecast(1)) - measure = data$getSerie( pred$getIndexInData(index)+1 )[1:horizon] prediction = pred$getForecast(index) + measure = data$getSerie( pred$getIndexInData(index) )[length(prediction)] yrange = range(measure, prediction) par(mar=c(4.7,5,1,1), cex.axis=1.5, cex.lab=1.5, lwd=3) plot(measure, type="l", ylim=yrange, xlab="Time (hours)", ylab="PM10") @@ -144,6 +143,7 @@ plotFbox <- function(data, indices=seq_len(data$getSize())) #' @param index Index in forecast (integer or date) #' @param limit Number of neighbors to consider #' @param plot Should the result be plotted? +#' @param predict_from First prediction instant #' #' @return A list with #' \itemize{ @@ -153,10 +153,9 @@ plotFbox <- function(data, indices=seq_len(data$getSize())) #' } #' #' @export -computeFilaments <- function(data, pred, index, limit=60, plot=TRUE) +computeFilaments <- function(data, pred, index, predict_from, limit=60, plot=TRUE) { - ref_serie = data$getCenteredSerie( pred$getIndexInData(index) ) - if (any(is.na(ref_serie))) + if (is.null(pred$getParams(index)$weights) || is.na(pred$getParams(index)$weights[1])) stop("computeFilaments requires a serie without NAs") # Compute colors for each neighbor (from darkest to lightest) @@ -170,7 +169,8 @@ computeFilaments <- function(data, pred, index, limit=60, plot=TRUE) if (plot) { # Complete series with (past and present) tomorrows - ref_serie = c(ref_serie, data$getCenteredSerie( pred$getIndexInData(index)+1 )) + ref_serie = c( data$getCenteredSerie( pred$getIndexInData(index)-1 ), + data$getCenteredSerie( pred$getIndexInData(index) ) ) centered_series = rbind( data$getCenteredSeries( pred$getParams(index)$indices ), data$getCenteredSeries( pred$getParams(index)$indices+1 ) ) @@ -185,7 +185,7 @@ computeFilaments <- function(data, pred, index, limit=60, plot=TRUE) } # Also plot ref curve, in red plot(ref_serie, ylim=yrange, type="l", col="#FF0000", xlab="", ylab="") - abline(v=24, lty=2, col=colors()[56], lwd=1) + abline(v=24+predict_from-0.5, lty=2, col=colors()[56], lwd=1) } list( @@ -202,7 +202,7 @@ computeFilaments <- function(data, pred, index, limit=60, plot=TRUE) #' @param fil Output of \code{computeFilaments} #' #' @export -plotFilamentsBox = function(data, fil) +plotFilamentsBox = function(data, fil, predict_from) { if (!requireNamespace("rainbow", quietly=TRUE)) stop("Functional boxplot requires the rainbow package") @@ -220,7 +220,7 @@ plotFilamentsBox = function(data, fil) par(new=TRUE) plot(c(data$getSerie(fil$index),data$getSerie(fil$index+1)), type="l", lwd=2, lty=2, ylim=c(usr[3] + yr, usr[4] - yr), xlab="", ylab="") - abline(v=24, lty=2, col=colors()[56]) + abline(v=24+predict_from-0.5, lty=2, col=colors()[56]) } #' Plot relative conditional variability / absolute variability @@ -232,7 +232,7 @@ plotFilamentsBox = function(data, fil) #' @inheritParams plotFilamentsBox #' #' @export -plotRelVar = function(data, fil) +plotRelVar = function(data, fil, predict_from) { ref_var = c( apply(data$getSeries(fil$neighb_indices),1,sd), apply(data$getSeries(fil$neighb_indices+1),1,sd) ) @@ -247,5 +247,5 @@ plotRelVar = function(data, fil) xlab="Time (hours)", ylab="Standard deviation") par(new=TRUE) plot(global_var, type="l", col=2, lwd=3, ylim=yrange, xlab="", ylab="") - abline(v=24, lty=2, col=colors()[56]) + abline(v=24+predict_from-0.5, lty=2, col=colors()[56]) } diff --git a/pkg/R/utils.R b/pkg/R/utils.R index b0d0ae0..96ec601 100644 --- a/pkg/R/utils.R +++ b/pkg/R/utils.R @@ -127,6 +127,6 @@ getSimilarDaysIndices = function(index, data, limit, same_season, days_in=NULL) .getNoNA2 = function(data, first, last) { (first:last)[ sapply(first:last, function(i) - !any( is.na(data$getCenteredSerie(i)) | is.na(data$getCenteredSerie(i+1)) ) + !any( is.na(data$getSerie(i)) | is.na(data$getSerie(i+1)) ) ) ] } diff --git a/pkg/tests/testthat/helper.R b/pkg/tests/testthat/helper.R index 491cf9c..af98c0b 100644 --- a/pkg/tests/testthat/helper.R +++ b/pkg/tests/testthat/helper.R @@ -25,7 +25,7 @@ getDataTest = function(n) time = as.POSIXct((i-1)*60*60*24+15*60*(1:96), origin="2007-01-01", tz="GMT") exo = runif(4) exo_hat = runif(4) - data$append(time, serie, exo, exo_hat) + data$append(time=time, value=serie, exo=exo, exo_hat=exo_hat) } data } diff --git a/pkg/tests/testthat/test-DateIntegerConv.R b/pkg/tests/testthat/test-DateIntegerConv.R index 99e5fa5..b3d12fb 100644 --- a/pkg/tests/testthat/test-DateIntegerConv.R +++ b/pkg/tests/testthat/test-DateIntegerConv.R @@ -2,10 +2,8 @@ context("Date <--> integer conversions") ts_data = system.file("testdata","ts_test.csv",package="talweg") exo_data = system.file("testdata","exo_test.csv",package="talweg") -data0 <<- getData(ts_data, exo_data, input_tz="GMT", date_format="%Y-%m-%d %H:%M", - working_tz="GMT", predict_at=0, limit=Inf) -data7 <<- getData(ts_data, exo_data, input_tz="GMT", date_format="%Y-%m-%d %H:%M", - working_tz="GMT", predict_at=7, limit=Inf) +data0 <<- getData(ts_data, exo_data, date_format="%Y-%m-%d %H:%M", limit=Inf) +data7 <<- getData(ts_data, exo_data, date_format="%Y-%m-%d %H:%M", limit=Inf) test_that("dateIndexToInteger works as expected", { diff --git a/pkg/tests/testthat/test-Forecaster.R b/pkg/tests/testthat/test-Forecaster.R index 09b6f0a..78e387a 100644 --- a/pkg/tests/testthat/test-Forecaster.R +++ b/pkg/tests/testthat/test-Forecaster.R @@ -2,18 +2,15 @@ context("Check that forecasters behave as expected") ts_data = system.file("testdata","ts_test.csv",package="talweg") exo_data = system.file("testdata","exo_test.csv",package="talweg") -data00 <<- getData(ts_data, exo_data, input_tz="GMT", date_format="%Y-%m-%d %H:%M", - working_tz="GMT", predict_at=0, limit=Inf) -data13 <<- getData(ts_data, exo_data, input_tz="GMT", date_format="%Y-%m-%d %H:%M", - working_tz="GMT", predict_at=13, limit=Inf) -#Forecast at sunday to saturday (series 7 to 1), for monday to sunday (series 1 to 7) -indices <<- seq(as.Date("2007-04-01"),as.Date("2007-04-07"),"days") +data_p <<- getData(ts_data, exo_data, date_format="%Y-%m-%d %H:%M", limit=Inf) +#Forecasts from monday to sunday (series 1 to 7) +indices <<- seq(as.Date("2007-04-02"),as.Date("2007-04-08"),"days") pred_order = c(7,1:6) #will facilitate tests test_that("Average method behave as expected", { - pred00_z = computeForecast(data00, indices, "Average", "Zero", Inf, 24) - pred00_p = computeForecast(data00, indices, "Average", "Persistence", Inf, 24) + pred00_z = computeForecast(data_p, indices, "Average", "Zero", 1, Inf, 24, ncores=1) + pred00_p = computeForecast(data_p, indices, "Average", "Persistence", 1, Inf, 24) for (i in 1:7) { #zero jump: should predict true values minus 1 @@ -22,30 +19,30 @@ test_that("Average method behave as expected", expect_equal( pred00_p$getForecast(i), rep(i,24) ) } - #NOTE: days become + #NOTE: 24h-block become #1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 (14h-->0h then 1h-->13h) #No jump between days, thus zero and persistence are equivalent (and correct) - pred13_z = computeForecast(data13, indices, "Average", "Zero", Inf, 24) - pred13_p = computeForecast(data13, indices, "Average", "Persistence", Inf, 24) + pred13_z = computeForecast(data_p, indices, "Average", "Zero", 14, Inf, 24) + pred13_p = computeForecast(data_p, indices, "Average", "Persistence", 14, Inf, 24) for (i in 1:7) { - expect_equal( pred13_z$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) ) - expect_equal( pred13_p$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) ) + expect_equal( pred13_z$getForecast(i), rep(i,24) ) + expect_equal( pred13_p$getForecast(i), rep(i,24) ) } #A few extra checks - expect_equal( pred00_p$getIndexInData(2), dateIndexToInteger("2007-04-02",data00) ) - expect_equal( pred00_z$getIndexInData(2), dateIndexToInteger("2007-04-02",data00) ) - expect_equal( pred13_p$getIndexInData(5), dateIndexToInteger("2007-04-05",data13) ) - expect_equal( pred13_z$getIndexInData(5), dateIndexToInteger("2007-04-05",data13) ) + expect_equal( pred00_p$getIndexInData(2), dateIndexToInteger("2007-04-03",data_p) ) + expect_equal( pred00_z$getIndexInData(2), dateIndexToInteger("2007-04-03",data_p) ) + expect_equal( pred13_p$getIndexInData(5), dateIndexToInteger("2007-04-06",data_p) ) + expect_equal( pred13_z$getIndexInData(5), dateIndexToInteger("2007-04-06",data_p) ) }) test_that("Persistence method behave as expected", { #Situation A: +Zero; (generally) correct if jump, wrong otherwise - pred00_sd = computeForecast(data00, indices, "Persistence", "Zero", Inf, 24, + pred00_sd = computeForecast(data_p, indices, "Persistence", "Zero", 1, Inf, 24, ncores=1, same_day=TRUE) - pred00_dd = computeForecast(data00, indices, "Persistence", "Zero", Inf, 24, + pred00_dd = computeForecast(data_p, indices, "Persistence", "Zero", 1, Inf, 24, ncores=1, same_day=FALSE) for (i in 1:7) { @@ -53,25 +50,20 @@ test_that("Persistence method behave as expected", expect_equal(pred00_dd$getForecast(i), rep(pred_order[i],24)) } - pred13_sd = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, + pred13_sd = computeForecast(data_p, indices, "Persistence", "Zero", 14, Inf, 24, ncores=1, same_day=TRUE) - pred13_dd = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, + pred13_dd = computeForecast(data_p, indices, "Persistence", "Zero", 14, Inf, 24, ncores=1, same_day=FALSE) - for (i in 2:6) + for (i in 1:7) { - expect_equal(pred13_sd$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) ) - expect_equal(pred13_dd$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) ) + expect_equal(pred13_sd$getForecast(i), rep(i,24) ) + expect_equal(pred13_dd$getForecast(i), rep(i,24) ) } - #boundaries are special cases: OK if same day, quite wrong otherwise - expect_equal(pred13_sd$getForecast(1), c( rep(1,11), rep(2,13) ) ) - expect_equal(pred13_dd$getForecast(1), c( rep(1,11), rep(-5,13) ) ) - expect_equal(pred13_sd$getForecast(7), c( rep(7,11), rep(1,13) ) ) - expect_equal(pred13_dd$getForecast(7), c( rep(7,11), rep(8,13) ) ) #Situation B: +Persistence, (generally) correct - pred00_sd = computeForecast(data00, indices, "Persistence", "Persistence", Inf, 24, + pred00_sd = computeForecast(data_p, indices, "Persistence", "Persistence", 1, Inf, 24, ncores=1, same_day=TRUE) - pred00_dd = computeForecast(data00, indices, "Persistence", "Persistence", Inf, 24, + pred00_dd = computeForecast(data_p, indices, "Persistence", "Persistence", 1, Inf, 24, ncores=1, same_day=FALSE) for (i in 3:7) { @@ -84,55 +76,50 @@ test_that("Persistence method behave as expected", expect_equal(pred00_sd$getForecast(2), rep(2,24) ) expect_equal(pred00_dd$getForecast(2), rep(-5,24) ) - pred13_sd = computeForecast(data13, indices, "Persistence", "Persistence", Inf, 24, + pred13_sd = computeForecast(data_p, indices, "Persistence", "Persistence", 14, Inf, 24, ncores=1, same_day=TRUE) - pred13_dd = computeForecast(data13, indices, "Persistence", "Persistence", Inf, 24, + pred13_dd = computeForecast(data_p, indices, "Persistence", "Persistence", 14, Inf, 24, ncores=1, same_day=FALSE) - for (i in 2:6) + for (i in 1:7) { - expect_equal(pred13_sd$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) ) - expect_equal(pred13_dd$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) ) + expect_equal(pred13_sd$getForecast(i), rep(i,24) ) + expect_equal(pred13_dd$getForecast(i), rep(i,24) ) } - #boundaries are special cases: OK if same day, quite wrong otherwise - expect_equal(pred13_sd$getForecast(1), c( rep(1,11), rep(2,13) ) ) - expect_equal(pred13_dd$getForecast(1), c( rep(1,11), rep(-5,13) ) ) - expect_equal(pred13_sd$getForecast(7), c( rep(7,11), rep(1,13) ) ) - expect_equal(pred13_dd$getForecast(7), c( rep(7,11), rep(8,13) ) ) #A few extra checks - expect_equal( pred00_sd$getIndexInData(3), dateIndexToInteger("2007-04-03",data00) ) - expect_equal( pred00_dd$getIndexInData(6), dateIndexToInteger("2007-04-06",data00) ) - expect_equal( pred13_sd$getIndexInData(3), dateIndexToInteger("2007-04-03",data13) ) - expect_equal( pred13_dd$getIndexInData(6), dateIndexToInteger("2007-04-06",data13) ) + expect_equal( pred00_sd$getIndexInData(3), dateIndexToInteger("2007-04-04",data_p) ) + expect_equal( pred00_dd$getIndexInData(6), dateIndexToInteger("2007-04-07",data_p) ) + expect_equal( pred13_sd$getIndexInData(3), dateIndexToInteger("2007-04-04",data_p) ) + expect_equal( pred13_dd$getIndexInData(6), dateIndexToInteger("2007-04-07",data_p) ) }) test_that("Neighbors method behave as expected", { #Situation A: +Zero; correct if jump, wrong otherwise - pred00 = computeForecast(data00, indices, "Neighbors", "Zero", Inf, 24, + pred00 = computeForecast(data_p, indices, "Neighbors", "Zero", 1, Inf, 24, simtype="mix", local=FALSE) for (i in 1:7) expect_equal(pred00$getForecast(i), rep(pred_order[i],24)) - pred13 = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, + pred13 = computeForecast(data_p, indices, "Persistence", "Zero", 14, Inf, 24, simtype="mix", local=FALSE) for (i in 1:7) - expect_equal(pred13$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) ) + expect_equal(pred13$getForecast(i), rep(i,24) ) #Situation B: +Neighbors == too difficult to eval in a unit test -# pred00 = computeForecast(data00, indices, "Neighbors", "Neighbors", Inf, 24, +# pred00 = computeForecast(data_p, indices, "Neighbors", "Neighbors", 1, Inf, 24, # simtype="endo", local=FALSE) # jumps = ... # for (i in 1:7) # expect_equal(pred00$getForecast(i), rep(pred_order[i]+jumps[i],24)) -# pred13 = computeForecast(data13, indices, "Neighbors", "Neighbors", Inf, 24, +# pred13 = computeForecast(data_p, indices, "Neighbors", "Neighbors", 14, Inf, 24, # simtype="endo", local=FALSE) # for (i in 1:7) # expect_equal(pred13$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) ) #A few extra checks - expect_equal( pred00$getIndexInData(1), dateIndexToInteger("2007-04-01",data00) ) - expect_equal( pred00$getIndexInData(4), dateIndexToInteger("2007-04-04",data00) ) - expect_equal( pred13$getIndexInData(1), dateIndexToInteger("2007-04-01",data13) ) - expect_equal( pred13$getIndexInData(4), dateIndexToInteger("2007-04-04",data13) ) + expect_equal( pred00$getIndexInData(1), dateIndexToInteger("2007-04-02",data_p) ) + expect_equal( pred00$getIndexInData(4), dateIndexToInteger("2007-04-05",data_p) ) + expect_equal( pred13$getIndexInData(1), dateIndexToInteger("2007-04-02",data_p) ) + expect_equal( pred13$getIndexInData(4), dateIndexToInteger("2007-04-05",data_p) ) }) diff --git a/pkg/tests/testthat/test-computeFilaments.R b/pkg/tests/testthat/test-computeFilaments.R index 7e1cafa..6ddd2c5 100644 --- a/pkg/tests/testthat/test-computeFilaments.R +++ b/pkg/tests/testthat/test-computeFilaments.R @@ -4,15 +4,15 @@ test_that("output is as expected on simulated series", { data = getDataTest(150) - # index 143 : serie type 2 - pred = computeForecast(data, 143, "Neighbors", "Zero", + # index 144-1 == 143 : serie type 2 + pred = computeForecast(data, 144, "Neighbors", "Zero", predict_from=8, horizon=length(data$getSerie(1)), simtype="endo", local=FALSE, h_window=1) - f = computeFilaments(data, pred, 1, limit=60, plot=FALSE) + f = computeFilaments(data, pred, 1, 8, limit=60, plot=FALSE) # Expected output: 50-3-10 series of type 2, then 23 series of type 3 (closest next) expect_identical(length(f$neighb_indices), as.integer(60)) expect_identical(length(f$colors), as.integer(60)) - expect_equal(f$index, 143) + expect_equal(f$index, 144) expect_true(all(I(f$neighb_indices) >= 2)) for (i in 1:37) { @@ -27,16 +27,16 @@ test_that("output is as expected on simulated series", expect_match(f$colors[1], "#1*") expect_match(f$colors[38], "#E*") - # index 142 : serie type 1 - pred = computeForecast(data, 142, "Neighbors", "Zero", + # index 143-1 == 142 : serie type 1 + pred = computeForecast(data, 143, "Neighbors", "Zero", predict_from=8, horizon=length(data$getSerie(1)), simtype="endo", local=FALSE, h_window=1) - f = computeFilaments(data, pred, 1, limit=50, plot=FALSE) + f = computeFilaments(data, pred, 1, 8, limit=50, plot=FALSE) # Expected output: 50-10-3 series of type 1, then 13 series of type 3 (closest next) # NOTE: -10 because only past days with no-NAs tomorrow => exclude type 1 in [60,90[ expect_identical(length(f$neighb_indices), as.integer(50)) expect_identical(length(f$colors), as.integer(50)) - expect_equal(f$index, 142) + expect_equal(f$index, 143) expect_true(all(I(f$neighb_indices) != 2)) for (i in 1:37) { diff --git a/reports/PackageR.gj b/reports/PackageR.gj index d62dc36..567bfd6 100644 --- a/reports/PackageR.gj +++ b/reports/PackageR.gj @@ -25,18 +25,17 @@ ts_data <- read.csv(system.file("extdata","pm10_mesures_H_loc.csv", package="talweg")) exo_data <- read.csv(system.file("extdata","meteo_extra_noNAs.csv", package="talweg")) -data <- getData(ts_data, exo_data, input_tz="GMT", - date_format="%d/%m/%Y %H:%M", working_tz="GMT", - predict_at=7, limit=120) +data <- getData(ts_data, exo_data, + date_format="%d/%m/%Y %H:%M", limit=120) # Plus de détails à la section 1 ci-après. # Prédiction de 10 courbes (jours 102 à 111) -pred <- computeForecast(data, 101:110, "Persistence", "Zero", memory=50, - horizon=12, ncores=1) +pred <- computeForecast(data, 101:110, "Persistence", "Zero", + predict_from=8, memory=50, horizon=24, ncores=1) # Plus de détails à la section 2 ci-après. # Calcul des erreurs (sur un horizon arbitraire <= horizon de prédiction) -err <- computeError(data, pred, horizon=6) +err <- computeError(data, pred, predict_from=8, horizon=20) # Plus de détails à la section 3 ci-après. # Puis voir ?plotError et les autres plot dans le paragraphe 'seealso' @@ -51,14 +50,9 @@ première colonne contient les heures, la seconde les valeurs. première colonne contient les jours, les $m$ suivantes les variables mesurées pour ce jour, et les $m$ dernières les variables prédites pour ce même jour. Dans notre cas $m=4$ : pression, température, gradient de température, vitesse du vent. - 3. **input_tz** : zone horaire pour ts_data (défaut : "GMT"). - 4. **date_format** : format des heures dans ts_data (défaut : "%d/%m/%Y %H:%M", format -du fichier transmis par Michel). - 5. **working_tz** : zone horaire dans laquelle on souhaite travailler avec les données -(défaut : "GMT"). - 6. **predict_at** : heure à laquelle s'effectue la prévision $-$ et donc dernière heure -d'un bloc de 24h, relativement à working_tz. data`$`getSerie(3) renvoit ainsi les 24 -valeurs de 8h à 7h pour le $3^{eme}$ bloc de 24h présent dans le jeu de données. + 3. **date_format** : format des heures dans ts_data (défaut : "%d/%m/%Y %H:%M", format +du fichier transmis par Michel Bobbia). + 4. **limit** : nombre de séries à récupérer. -----r print(data) #?Data @@ -76,10 +70,11 @@ blocs de 24h) ; peut être donnée sous forme d'un vecteur de dates ou d'entiers ?computeForecast 5. **memory** : le nombre de jours à prendre en compte dans le passé pour chaque prévision (par défaut : Inf, c'est-à-dire tout l'historique pris en compte). - 6. **horizon** : le nombre d'heures à prédire ; par défaut "data`$`getStdHorizon()", -c'est-à-dire le nombre d'heures restantes à partir de l'instant de prévision + 1 jusqu'à -minuit (17 pour predict_at=7 par exemple). - 7. **ncores** : le nombre de processus parallèles (utiliser 1 pour une exécution + 6. **predict_from** : première heure de prévision. Les séries prévues démarrent +cependant toutes à 1h du matin (en reprenant les premières valeurs connues). + 7. **horizon** : dernière heure de prévision ; maximum 24 == minuit (valeur par défaut). +pred`$`getForecast(i) retourne une journée complète de 01:00 à 00:00 si horizon=24. + 8. **ncores** : le nombre de processus parallèles (utiliser 1 pour une exécution séquentielle) -----r print(pred) @@ -91,9 +86,10 @@ Les arguments de cette fonction sont, dans l'ordre : 1. **data** : le jeu de données renvoyé par getData() 2. **pred** : les prédictions renvoyées par computeForecast() - 3. **horizon** : le nombre d'heures à considérer pour le calcul de l'erreur ; doit être -inférieur ou égal à l'horizon utilisé pour la prédiction (même valeur par défaut : -"data`$`getStdHorizon()") + 3. **predict_from** : première heure de prévision ; peut être différente de l'analogue +dans l'appel à *computeForecast()*. + 4. **horizon** : dernière heure de prévision à considérer pour le calcul de l'erreur ; +inférieure ou égale à la valeur de l'argument analogue dans *computeForecast()* -----r summary(err) summary(err$abs)