--- /dev/null
+setwd("/Users/bp/Desktop/CONTRATS_AirNormand/2016/RapportFinalBruno")
+rm(list=ls())
+
+# Lecture des données
+pm = read.table("DATA/mesures_horaires_hloc_pm10_a_filer.csv",sep=";",dec=".",header=T) #,row.names=1)
+n = dim(pm)[1]
+datedebut = "10/12/2008"
+
+# Chargement des données météo et indicateurs
+VarExp <- read.table("DATA/meteo_extra_jourMois.csv",sep=";",dec=".",header=T)
+VarExp <- VarExp[-1,]
+
+# Lecture des dates
+dates = read.table("DATA/Dates_jours.csv",header=F,as.is=T)
+dates = dates[,1]
+
+# Contruction des matrices de données
+pm.h <- matrix(pm[,2],ncol=24,byrow=TRUE)
+
+Nlignes = nrow(pm.h)
+Data = cbind(pm.h[1:(Nlignes -1), ], pm.h[2:Nlignes, ])
+dates2 = dates[2:Nlignes]
+rownames(Data) = dates2
+# df contient l'ensemble des données.
+#df <- cbind(Data,varexp[,-1])
+df <- Data
+PMjour <- apply(df[,25:48],1,mean,na.rm=T)
+dfexp <- cbind(VarExp,PMjour)
+
+Dates = c(
+"16/03/2015",
+"19/01/2015",
+"27/04/2015")
+
+Categorie = c("Epandage", "Chauffage", "Non Polluée")
+
+RepFig = "FIGURES_Etude"
+
+ResDates = NULL
+
+nbvois=10
+j=1 # numéro de semaine
+ij=6 # numéro du jour (0 = lundi)
+
+Err24 = NULL
+ErrPrev = NULL
+Kvois = NULL
+
+for (Hc in 5:24) {
+
+H=24+Hc
+fen=H
+#fen=12 ou 24 ou H
+L = (H-fen+1):H
+
+# Premier conditionnement : mois
+indcond <- dfexp[,"Mois"] == 2 | dfexp[,"Mois"] == 3 | dfexp[,"Mois"] == 4 | dfexp[,"Mois"] == 9 | dfexp[,"Mois"] == 10
+data = df[indcond,]
+varexp = dfexp[indcond,]
+
+nl = (1:nrow(data))[rownames(data)==Dates[j]]
+dateJPrev = rownames(data)[nl+ij]
+dataj = as.numeric(data[nl+ij, 1:48])
+data = data[-(nl + ij), ]
+varexp = varexp[-(nl + ij), ]
+indNA = attr(na.omit(data[, 1:48]),"na.action")
+data = data[-indNA,]
+varexp = varexp[-indNA,]
+
+# Conditionnement : les jours avec PMjour +/- large
+large = 1
+bornes = mean(dataj[25:48])+c(-large,large)
+indcond = varexp[,"PMjour"]>=bornes[1] & varexp[,"PMjour"]<=bornes[2]
+data = data[indcond,]
+varexp = varexp[indcond,]
+
+D = rep(0,nrow(data))
+for (k in 1:nrow(data)) {
+ #D[k] = sqrt(sum((1:H)*(dataj[L] -data[k,L])^2))
+ D[k] = sqrt(sum((dataj[L] -data[k,L])^2))
+}
+ind = order(D)[1:nbvois]
+w = 1/(D[ind]^2)
+w = w/sum(w)
+W = w%o%rep(1,48)
+JourMoy = apply(data[ind, 1:48], 2, mean)
+#JourMoy = apply(W*data[ind, 1:48], 2, sum)
+NomFile = paste("Voisins_Epandage_PMjour_Hc_",Hc,".png",sep="")
+Titre = paste("Jour à prévoir : ",dateJPrev," - ", length(ind)," voisins",sep="")
+#erreur = sqrt(sum((dataj[25:48] - JourMoy[25:48])^2))
+if(Hc==24){erreurPrev=NA}else{erreurPrev = mean(abs(dataj[(H+1):48] - JourMoy[(H+1):48]))}
+erreur24 = mean(abs(dataj[25:48] - JourMoy[25:48]))
+png(NomFile)
+matplot(t(data[ind, 1:48]), type = "l", lwd=1.4, lty=1, col=1:length(ind),
+ cex.axis=1.4, cex.main = 1.7, cex.lab=1.5,
+ xlab="Heures locales", ylab=paste0("PM10 - Erreurs = ",round(erreur24,1)," / ",round(erreurPrev,1)), main=Titre)
+legend("top",rownames(data)[ind],ncol=2,lwd=1.4, lty=1, col=1:length(ind))
+lines(1:48, dataj, lwd=2.5)
+lines(JourMoy, lty = 2, lwd=2)
+abline(v=c(24.5,H+0.5), lty = 2, lwd=1.2)
+xx=dev.off()
+
+
+
+ Err24 = c(Err24, erreur24)
+ ErrPrev = c(ErrPrev, erreurPrev)
+ResDates = cbind(ResDates, rownames(data)[ind])
+}
+
+
+rownames(ResDates) = 1:10
+
+Kvois = NULL
+for (Col in ncol(ResDates):1) {
+K = 0
+for (I in 1:10){
+ for (J in 1:10){
+ if (ResDates[I,1] == ResDates[J,Col]) K = K +1
+ }
+}
+Kvois = c(Kvois, K)
+}
+
+# pdf("Erreur_Epandage_PMjour.pdf")
+ymin = min(na.omit(ErrPrev), Err24)
+ymin = min(ymin, Kvois)
+ymax = max(na.omit(ErrPrev), Err24)
+ymax = max(ymax, Kvois)
+plot(5:24,Err24, type = "l", lwd = 2, cex.axis=1.4, cex.lab = 1.5,
+ ylab = "MAE",xlab = "Heures de prévision", ylim= c(ymin,ymax))
+lines(5:24,ErrPrev, lwd=2, lty = 2)
+legend("topright", legend=c("Erreur 24h", "Erreur prévision"), lty = c(1,2), lwd=2, cex=1.5)
+points(5:24, Kvois, cex=1.8, pch = 19)
+# xx = dev.off()
+
+length(D)
#'
#' Data encapsulation as a list (days) of lists (components).
#'
-#' The private field .data is a list where each cell contains the variables for a period
-#' of time of 24 hours, from time P+1 until P the next day where P is an integer between
-#' 0 and 23. .data[[1]] refers to the first measured data (serie and exogenous
-#' variables): the corresponding times can be accessed through the function
-#' \code{getTime()} below. Each cell .data[[i]] is itself a list containing five slots,
-#' as described in the 'field' section.
+#' The private field .tv is a list where each cell contains the hourly variables for a
+#' period of time of 24 hours, from 1am to next midnight. The other lists contain
+#' informations on series' levels and exogenous variables (both measured and predicted).
#'
#' @usage # Data$new()
#'
-#' @field .data[[i]] List of
+#' @field .tv List of "time-values"; in each cell:
#' \itemize{
#' \item time: vector of times
-#' \item centered_serie: centered serie
-#' \item level: corresponding level
-#' \item exo: exogenous variables
-#' \item exo_hat: predicted exogenous variables (for next day)
+#' \item serie: (measured) serie
#' }
+#' @field .level Vector of measured levels
+#' @field .level_hat Vector of predicted levels
+#' @field .exo List of measured exogenous variables, cell = numerical vector.
+#' @field .exo_hat List of predicted exogenous variables, cell = numerical vector.
#'
#' @section Methods:
#' \describe{
#' \item{\code{getSize()}}{
#' Number of series in dataset.}
-#' \item{\code{getStdHorizon()}}{
-#' Number of time steps from serie[1] until midnight}
-#' \item{\code{append(time, serie, exo, exo_hat)}}{
+#' \item{\code{append(time, value, level_hat, exo, exo_hat)}}{
#' Measured data for given vector of times + exogenous predictions from
#' last midgnight.}
#' \item{\code{getTime(index)}}{
#' Times (vector) at specified index.}
-#' \item{\code{getCenteredSerie(index)}}{
-#' Centered serie at specified index.}
-#' \item{\code{getCenteredSeries(indices)}}{
-#' Centered series at specified indices (in columns).}
-#' \item{\code{getLevel(index)}}{
-#' Level at specified index.}
#' \item{\code{getSerie(index)}}{
#' Serie (centered+level) at specified index.}
#' \item{\code{getSeries(indices)}}{
#' Series at specified indices (in columns).}
-#' \item{\code{getExoHat(index)}}{
-#' Predicted exogenous variables at specified index.}
+#' \item{\code{getLevel(index)}}{
+#' Measured level at specified index.}
+#' \item{\code{getLevelHat(index)}}{
+#' Predicted level at specified index.}
+#' \item{\code{getCenteredSerie(index)}}{
+#' Centered serie at specified index.}
+#' \item{\code{getCenteredSeries(indices)}}{
+#' Centered series at specified indices (in columns).}
#' \item{\code{getExo(index)}}{
#' Measured exogenous variables at specified index.}
-#' \item{\code{removeFirst()}}{
-#' Remove first list element (if truncated).}
-#' \item{\code{removeLast()}}{
-#' Remove last list element (if truncated).}
+#' \item{\code{getExoHat(index)}}{
+#' Predicted exogenous variables at specified index.}
#' }
#'
#' @docType class
#'
Data = R6::R6Class("Data",
private = list(
- .data = list()
+ .tv = list(),
+ .level = vector("double",0),
+ .level_hat = vector("double",0),
+ .exo = list(),
+ .exo_hat = list()
),
public = list(
getSize = function()
- length(private$.data)
- ,
- getStdHorizon = function()
- 24 - as.POSIXlt( private$.data[[1]]$time[1] )$hour + 1
+ length(private$.tv)
,
- append = function(time, serie, exo, exo_hat)
+ append = function(time=NULL, value=NULL, level_hat=NULL, exo=NULL, exo_hat=NULL)
{
- level = mean(serie, na.rm=TRUE)
- centered_serie = serie - level
- private$.data[[length(private$.data)+1]] <- list(
- "time"=time, #H-24 --> H-1
- "centered_serie"=centered_serie, #at 'time'
- "level"=level, #at 'time'
- "exo"=exo, #at 'time' (yersteday 0am to last midnight)
- "exo_hat"=exo_hat) #today 0am to next midnight
+ if (!is.null(time) && !is.null(value))
+ {
+ L = length(private$.tv)
+ if (L == 0 || strftime( tail(private$.tv[[L]]$time,1),
+ format="%H:%M:%S", tz="GMT" ) == "00:00:00")
+ {
+ # Append a new cell
+ private$.tv[[L+1]] <- list("time"=time, "serie"=value)
+ }
+ else
+ {
+ # Complete current cell
+ private$.tv[[L]]$time = c(private$.tv[[L]]$time, time)
+ private$.tv[[L]]$serie = c(private$.tv[[L]]$serie, value)
+ }
+ }
+ if (strftime( tail(private$.tv[[length(private$.tv)]]$time,1),
+ format="%H:%M:%S", tz="GMT" ) == "00:00:00")
+ {
+ private$.level = c(private$.level,
+ mean(private$.tv[[length(private$.tv)]]$serie, na.rm=TRUE))
+ }
+ if (!is.null(level_hat))
+ private$.level_hat = c(private$.level_hat, level_hat)
+ if (!is.null(exo))
+ private$.exo[[length(private$.exo)+1]] = exo
+ if (!is.null(exo_hat))
+ private$.exo_hat[[length(private$.exo_hat)+1]] = exo_hat
},
getTime = function(index)
{
index = dateIndexToInteger(index, self)
- private$.data[[index]]$time
+ private$.tv[[index]]$time
},
- getCenteredSerie = function(index)
+ getSerie = function(index)
{
index = dateIndexToInteger(index, self)
- private$.data[[index]]$centered_serie
+ private$.tv[[index]]$serie
},
- getCenteredSeries = function(indices)
- sapply(indices, function(i) self$getCenteredSerie(i))
+ getSeries = function(indices)
+ sapply(indices, function(i) self$getSerie(i))
,
getLevel = function(index)
{
index = dateIndexToInteger(index, self)
- private$.data[[index]]$level
+ private$.level[index]
},
- getSerie = function(index)
+ getLevelHat = function(index)
{
index = dateIndexToInteger(index, self)
- private$.data[[index]]$centered_serie + private$.data[[index]]$level
+ private$.level_hat[index]
},
- getSeries = function(indices)
- sapply(indices, function(i) self$getSerie(i))
- ,
- getExoHat = function(index)
+ getCenteredSerie = function(index)
{
index = dateIndexToInteger(index, self)
- private$.data[[index]]$exo_hat
+ private$.tv[[index]]$serie - private$.level[index]
},
+ getCenteredSeries = function(indices)
+ sapply(indices, function(i) self$getCenteredSerie(i))
+ ,
getExo = function(index)
{
index = dateIndexToInteger(index, self)
- private$.data[[index]]$exo
+ private$.exo[[index]]
},
- removeFirst = function()
- private$.data <- private$.data[2:length(private$.data)]
- ,
- removeLast = function()
- private$.data <- private$.data[1:(length(private$.data)-1)]
+ getExoHat = function(index)
+ {
+ index = dateIndexToInteger(index, self)
+ private$.exo_hat[[index]]
+ }
)
)
inherit = Forecaster,
public = list(
- predictShape = function(data, today, memory, horizon, ...)
+ predictShape = function(data, today, memory, predict_from, horizon, ...)
{
- avg = rep(0., horizon)
+ avg = rep(0., (horizon-predict_from+1))
first_day = max(1, today-memory)
- index = today-7 + 1
+ index <- today
nb_no_na_series = 0
repeat
{
- {
- serie_on_horizon = data$getCenteredSerie(index)[1:horizon]
- index = index - 7
- };
+ index = index - 7
+ if (index < first_day)
+ break
+ serie_on_horizon = data$getCenteredSerie(index)[predict_from:horizon]
if (!any(is.na(serie_on_horizon)))
{
avg = avg + serie_on_horizon
nb_no_na_series = nb_no_na_series + 1
- };
- if (index < first_day)
- break
+ }
}
avg / nb_no_na_series
}
#' where days in the past are chosen and weighted according to some similarity measures.
#'
#' The main method is \code{predictShape()}, taking arguments data, today, memory,
-#' horizon respectively for the dataset (object output of \code{getData()}), the current
-#' index, the data depth (in days) and the number of time steps to forecast.
+#' predict_from, horizon respectively for the dataset (object output of
+#' \code{getData()}), the current index, the data depth (in days), the first predicted
+#' hour and the last predicted hour.
#' In addition, optional arguments can be passed:
#' \itemize{
#' \item local : TRUE (default) to constrain neighbors to be "same days within same
#' season"
#' \item simtype : 'endo' for a similarity based on the series only,<cr>
-#' 'exo' for a similaruty based on exogenous variables only,<cr>
+#' 'exo' for a similarity based on exogenous variables only,<cr>
#' 'mix' for the product of 'endo' and 'exo',<cr>
#' 'none' (default) to apply a simple average: no computed weights
#' \item window : A window for similarities computations; override cross-validation
inherit = Forecaster,
public = list(
- predictShape = function(data, today, memory, horizon, ...)
+ predictShape = function(data, today, memory, predict_from, horizon, ...)
{
# (re)initialize computed parameters
private$.params <- list("weights"=NA, "indices"=NA, "window"=NA)
# Do not forecast on days with NAs (TODO: softer condition...)
- if (any(is.na(data$getCenteredSerie(today))))
+ if (any(is.na(data$getSerie(today-1)))
+ || any(is.na(data$getSerie(today)[1:(predict_from-1)])))
+ {
return (NA)
+ }
# Determine indices of no-NAs days followed by no-NAs tomorrows
- fdays = .getNoNA2(data, max(today-memory,1), today-1)
+ fdays = .getNoNA2(data, max(today-memory,1), today-2)
# Get optional args
local = ifelse(hasArg("local"), list(...)$local, TRUE) #same level + season?
if (hasArg("window"))
{
return ( private$.predictShapeAux(data,
- fdays, today, horizon, local, list(...)$window, simtype, TRUE) )
+ fdays, today, predict_from, horizon, local, list(...)$window, simtype, TRUE) )
}
# Indices of similar days for cross-validation; TODO: 20 = magic number
for (i in seq_along(cv_days))
{
# mix_strategy is never used here (simtype != "mix"), therefore left blank
- prediction = private$.predictShapeAux(data, fdays, cv_days[i], horizon, local,
- window, simtype, FALSE)
+ prediction = private$.predictShapeAux(data, fdays, cv_days[i], predict_from,
+ horizon, local, window, simtype, FALSE)
if (!is.na(prediction[1]))
{
nb_jours = nb_jours + 1
error = error +
- mean((data$getSerie(cv_days[i]+1)[1:horizon] - prediction)^2)
+ mean((data$getSerie(cv_days[i]+1)[predict_from:horizon] - prediction)^2)
}
}
return (error / nb_jours)
else #none: value doesn't matter
1
- return(private$.predictShapeAux(data, fdays, today, horizon, local,
- best_window, simtype, TRUE))
+ return( private$.predictShapeAux(data, fdays, today, predict_from, horizon, local,
+ best_window, simtype, TRUE) )
}
),
private = list(
# Precondition: "today" is full (no NAs)
- .predictShapeAux = function(data, fdays, today, horizon, local, window, simtype,
- final_call)
+ .predictShapeAux = function(data, fdays, today, predict_from, horizon, local, window,
+ simtype, final_call)
{
fdays_cut = fdays[ fdays < today ]
if (length(fdays_cut) <= 1)
private$.params$indices <- fdays
private$.params$window <- 1
}
- return ( data$getSerie(fdays[1])[1:horizon] )
+ return ( data$getSerie(fdays[1]+1)[predict_from:horizon] )
}
}
else
window_endo = ifelse(simtype=="mix", window[1], window)
# Distances from last observed day to days in the past
- serieToday = data$getSerie(today)
+ lastSerie = c( data$getSerie(today-1), data$getSerie(today)[1:(predict_from-1)] )
distances2 = sapply(fdays, function(i) {
- delta = serieToday - data$getSerie(i)
- mean(delta^2)
+ delta = lastSerie - c(data$getSerie(i),data$getSerie(i+1)[1:(predict_from-1)])
+ sqrt(mean(delta^2))
})
simils_endo <- .computeSimils(distances2, window_endo)
# Compute exogen similarities using given window
window_exo = ifelse(simtype=="mix", window[2], window)
- M = matrix( nrow=1+length(fdays), ncol=1+length(data$getExo(today)) )
- M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) )
+ M = matrix( ncol=1+length(fdays), nrow=1+length(data$getExo(1)) )
+ M[,1] = c( data$getLevelHat(today), as.double(data$getExoHat(today)) )
for (i in seq_along(fdays))
- M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) )
+ M[,i+1] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) )
- sigma = cov(M) #NOTE: robust covariance is way too slow
+ sigma = cov(t(M)) #NOTE: robust covariance is way too slow
# TODO: 10 == magic number; more robust way == det, or always ginv()
sigma_inv =
if (length(fdays) > 10)
# Distances from last observed day to days in the past
distances2 = sapply(seq_along(fdays), function(i) {
- delta = M[1,] - M[i+1,]
+ delta = M[,1] - M[,i+1]
delta %*% sigma_inv %*% delta
})
rep(1, length(fdays))
similarities = similarities / sum(similarities)
- prediction = rep(0, horizon)
+ prediction = rep(0, horizon-predict_from+1)
for (i in seq_along(fdays))
- prediction = prediction + similarities[i] * data$getSerie(fdays[i]+1)[1:horizon]
+ {
+ prediction = prediction +
+ similarities[i] * data$getSerie(fdays[i]+1)[predict_from:horizon]
+ }
if (final_call)
{
#
.getConstrainedNeighbs = function(today, data, fdays, min_neighbs=10, max_neighbs=12)
{
- levelToday = data$getLevel(today)
- distances = sapply(fdays, function(i) abs(data$getLevel(i)-levelToday))
- #TODO: 2, +3 : magic numbers
- dist_thresh = 2
+ levelToday = data$getLevelHat(today)
+ levelYersteday = data$getLevel(today-1)
+ distances = sapply(fdays, function(i) {
+ sqrt((data$getLevel(i)-levelYersteday)^2 + (data$getLevel(i+1)-levelToday)^2)
+ })
+ #TODO: 1, +1, +3 : magic numbers
+ dist_thresh = 1
min_neighbs = min(min_neighbs,length(fdays))
repeat
{
nb_neighbs = sum(same_pollution)
if (nb_neighbs >= min_neighbs) #will eventually happen
break
- dist_thresh = dist_thresh + 3
+ dist_thresh = dist_thresh + ifelse(dist_thresh>1,3,1)
}
fdays = fdays[same_pollution]
max_neighbs = 12
if (nb_neighbs > max_neighbs)
{
# Keep only max_neighbs closest neighbors
- fdays = fdays[
- sort(distances[same_pollution],index.return=TRUE)$ix[1:max_neighbs] ]
+ fdays = fdays[ order(distances[same_pollution])[1:max_neighbs] ]
}
fdays
}
inherit = Forecaster,
public = list(
- predictShape = function(data, today, memory, horizon, ...)
+ predictShape = function(data, today, memory, predict_from, horizon, ...)
{
# Return centered last (similar) day curve, avoiding NAs until memory is run
first_day = max(1, today-memory)
same_day = ifelse(hasArg("same_day"), list(...)$same_day, TRUE)
- # If 'same_day', get the last known future of similar day: -7 + 1 == -6
- index = today - ifelse(same_day,6,0)
+ index <- today
repeat
{
- {
- last_serie = data$getCenteredSerie(index)[1:horizon]
- index = index - ifelse(same_day,7,1)
- };
- if (!any(is.na(last_serie)))
- return (last_serie);
+ # If 'same_day', get the last known future of similar day
+ index = index - ifelse(same_day,7,1)
if (index < first_day)
return (NA)
+ last_serie = data$getCenteredSerie(index)[predict_from:horizon]
+ if (!any(is.na(last_serie)))
+ return (last_serie)
}
}
)
inherit = Forecaster,
public = list(
- predictShape = function(data, today, memory, horizon, ...)
- rep(0, horizon)
+ predictShape = function(data, today, memory, predict_from, horizon, ...)
+ rep(0, (horizon-predict_from+1))
)
)
#' Forecast encapsulation as a list (days where prediction occur) of lists (components).
#'
#' The private field .pred is a list where each cell contains the predicted variables for
-#' a period of time of H<=24 hours, from hour P+1 until P+H, where P+1 is taken right
-#' after the end of the period designated by \code{getIndexInData()}. In other terms,
-#' \code{forecast$getForecast(i)} return forecasts for \code{data$getSerie(i+1)}.
-#' Each cell .pred[[i]] is itself a list containing three slots, as described in the
-#' 'field' section.
+#' a period of time of H<=24 hours, from hour P until P+H-1, where P == predict_from.
+#' \code{forecast$getForecast(i)} output forecasts for
+#' \code{data$getSerie(forecast$getIndexInData(i))}.
#'
#' @usage # Forecast$new(dates)
#'
private$.pjump <- pjump
invisible(self)
},
- predictSerie = function(data, today, memory, horizon, ...)
+ predictSerie = function(data, today, memory, predict_from, horizon, ...)
{
# Parameters (potentially) computed during shape prediction stage
- predicted_shape = self$predictShape(data, today, memory, horizon, ...)
- predicted_delta = private$.pjump(data,today,memory,horizon,private$.params,...)
- # Predicted shape is aligned it on the end of current day + jump
- predicted_shape+tail(data$getSerie(today),1)-predicted_shape[1]+predicted_delta
+ predicted_shape = self$predictShape(data,today,memory,predict_from,horizon,...)
+ predicted_delta = private$.pjump(data, today, memory, predict_from, horizon,
+ private$.params, ...)
+
+ # Predicted shape is aligned on the end of current day + jump
+ c( data$getSerie(today)[if (predict_from>=2) 1:(predict_from-1) else c()],
+ predicted_shape - predicted_shape[1] + predicted_delta +
+ ifelse(predict_from>=2,
+ data$getSerie(today)[predict_from-1], tail(data$getSerie(today-1),1)) )
},
- predictShape = function(data, today, memory, horizon, ...)
+ predictShape = function(data, today, memory, predict_from, horizon, ...)
NULL #empty default implementation: to implement in inherited classes
,
getParameters = function()
#'
#' @aliases J_Neighbors
#'
-getNeighborsJumpPredict = function(data, today, memory, horizon, params, ...)
+getNeighborsJumpPredict = function(data, today, memory, predict_from, horizon,
+ params, ...)
{
first_day = max(1, today-memory)
filter = (params$indices >= first_day)
weights = params$weights[filter]
gaps = sapply(indices, function(i) {
- head( data$getSerie(i+1),1 ) - tail( data$getSerie(i),1 )
+ if (predict_from >= 2)
+ data$getSerie(i+1)[predict_from] - data$getSerie(i+1)[predict_from-1]
+ else
+ head(data$getSerie(i+1),1) - tail(data$getSerie(i),1)
})
scal_product = weights * gaps
norm_fact = sum( weights[!is.na(scal_product)] )
#'
#' @aliases J_Persistence
#'
-getPersistenceJumpPredict = function(data, today, memory, horizon, params, ...)
+getPersistenceJumpPredict = function(data, today, memory, predict_from,
+ horizon, params, ...)
{
#return gap between end of similar day curve and first day of tomorrow (in the past)
first_day = max(1, today-memory)
same_day = ifelse(hasArg("same_day"), list(...)$same_day, TRUE)
- index = today - ifelse(same_day,7,1)
+ index <- today
repeat
{
- {
- last_serie_end = tail( data$getSerie(index), 1)
- last_tomorrow_begin = head( data$getSerie(index+1), 1)
- index = index - ifelse(same_day,7,1)
- };
- if (!is.na(last_serie_end) && !is.na(last_tomorrow_begin))
- return (last_tomorrow_begin - last_serie_end);
+ # If 'same_day', get the last known future of similar day
+ index = index - ifelse(same_day,7,1)
if (index < first_day)
return (NA)
+ gap <-
+ if (predict_from >= 2)
+ data$getSerie(index)[predict_from] - data$getSerie(index)[predict_from-1]
+ else
+ head(data$getSerie(index),1) - tail(data$getSerie(index-1),1)
+ if (!is.na(gap))
+ return (gap)
}
}
#' step, averaged on the L forecasting days.
#'
#' @export
-computeError = function(data, pred, horizon=data$getStdHorizon())
+computeError = function(data, pred, predict_from, horizon=length(data$getSerie(1)))
{
L = pred$getSize()
mape_day = rep(0, horizon)
for (i in seq_len(L))
{
index = pred$getIndexInData(i)
- serie = data$getSerie(index+1)[1:horizon]
- forecast = pred$getForecast(i)[1:horizon]
+ serie = data$getSerie(index)[predict_from:horizon]
+ forecast = pred$getForecast(i)[predict_from:horizon]
if (!any(is.na(serie)) && !any(is.na(forecast)))
{
nb_no_NA_data = nb_no_NA_data + 1
#' #do_something_with_pred
#' }}
#' @export
-computeForecast = function(data, indices, forecaster, pjump,
- memory=Inf, horizon=data$getStdHorizon(), ncores=3, ...)
+computeForecast = function(data, indices, forecaster, pjump, predict_from,
+ memory=Inf, horizon=length(data$getSerie(1)), ncores=3, ...)
{
# (basic) Arguments sanity checks
+ predict_from = as.integer(predict_from)[1]
+ if (! predict_from %in% 1:length(data$getSerie(1)))
+ stop("predict_from in [1,24] (hours)")
horizon = as.integer(horizon)[1]
- if (horizon<=0 || horizon>length(data$getCenteredSerie(1)))
- stop("Horizon too short or too long")
+ if (horizon<=predict_from || horizon>length(data$getSerie(1)))
+ stop("Horizon in [predict_from+1,24] (hours)")
integer_indices = sapply(indices, function(i) dateIndexToInteger(i,data))
if (any(integer_indices<=0 | integer_indices>data$getSize()))
stop("Indices out of range")
p <- parallel::mclapply(seq_along(integer_indices), function(i) {
list(
"forecast" = forecaster$predictSerie(
- data, integer_indices[i], memory, horizon, ...),
+ data, integer_indices[i], memory, predict_from, horizon, ...),
"params"= forecaster$getParameters(),
"index" = integer_indices[i] )
}, mc.cores=ncores)
p <- lapply(seq_along(integer_indices), function(i) {
list(
"forecast" = forecaster$predictSerie(
- data, integer_indices[i], memory, horizon, ...),
+ data, integer_indices[i], memory, predict_from, horizon, ...),
"params"= forecaster$getParameters(),
"index" = integer_indices[i] )
})
#' @param exo_data Exogenous variables, as a data frame or a CSV file; first column is
#' dates, next block are measurements for the day, and final block are exogenous
#' forecasts (for the same day).
-#' @param input_tz Timezone in the input files ("GMT" or e.g. "Europe/Paris")
#' @param date_format How date/time are stored (e.g. year/month/day hour:minutes;
#' see ?strptime)
-#' @param working_tz Timezone to work with ("GMT" or e.g. "Europe/Paris")
-#' @param predict_at When does the prediction take place? Integer, in hours. Default: 0
#' @param limit Number of days to extract (default: Inf, for "all")
#'
#' @return An object of class Data
#' exo_data = read.csv(system.file("extdata","meteo_extra_noNAs.csv",package="talweg"))
#' data = getData(ts_data, exo_data, limit=120)
#' @export
-getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:%M",
- working_tz="GMT", predict_at=0, limit=Inf)
+getData = function(ts_data, exo_data, date_format="%d/%m/%Y %H:%M", limit=Inf)
{
# Sanity checks (not full, but sufficient at this stage)
- if (!is.character(input_tz) || !is.character(working_tz))
- stop("Bad timezone (see ?timezone)")
- input_tz = input_tz[1]
- working_tz = working_tz[1]
if ( (!is.data.frame(ts_data) && !is.character(ts_data)) ||
(!is.data.frame(exo_data) && !is.character(exo_data)) )
stop("Bad time-series / exogenous input (data frame or CSV file)")
ts_data = ts_data[1]
if (is.character(exo_data))
exo_data = exo_data[1]
- predict_at = as.integer(predict_at)[1]
- if (predict_at<0 || predict_at>23)
- stop("Bad predict_at (0-23)")
if (!is.character(date_format))
stop("Bad date_format (character)")
date_format = date_format[1]
+ if (!is.numeric(limit) || limit < 0)
+ stop("limit: positive integer")
ts_df =
if (is.character(ts_data))
read.csv(ts_data)
else
ts_data
- # Convert to the desired timezone (usually "GMT" or "Europe/Paris")
- formatted_dates_POSIXlt = strptime(as.character(ts_df[,1]), date_format, tz=input_tz)
- ts_df[,1] = format(
- as.POSIXct(formatted_dates_POSIXlt, tz=input_tz), tz=working_tz, usetz=TRUE)
+ # Convert to GMT (pretend it's GMT; no impact)
+ dates_POSIXlt = strptime(as.character(ts_df[,1]), date_format, tz="GMT")
+ ts_df[,1] = format(as.POSIXct(dates_POSIXlt, tz="GMT"), tz="GMT", usetz=TRUE)
exo_df =
if (is.character(exo_data))
line = line + 1
};
if (line >= nb_lines + 1
- || as.POSIXlt(ts_df[line-1,1],tz=working_tz)$hour == predict_at)
+ || as.POSIXlt(ts_df[line-1,1],tz="GMT")$hour == 0)
{
break
}
}
- exo = as.data.frame( exo_df[i,2:(1+nb_exos)] )
- exo_hat =
- if (i < nrow(exo_df))
- as.data.frame( exo_df[i+1,(1+nb_exos+1):(1+2*nb_exos)] )
- else
- NA #exogenous prediction for next day are useless on last day
- data$append(time, serie, exo, exo_hat)
+ # TODO: 2 modes, "operational" and "testing"; would need PM10 predictions
+ data$append(time=time, value=serie, level_hat=mean(serie,na.rm=TRUE),
+ exo=exo_df[i,2:(1+nb_exos)], exo_hat=exo_df[i,(1+nb_exos+1):(1+2*nb_exos)])
if (i >= limit)
break
i = i + 1
}
- if (length(data$getCenteredSerie(1)) < length(data$getCenteredSerie(2)))
- data$removeFirst()
- if (length(data$getCenteredSerie(data$getSize()))
- < length(data$getCenteredSerie(data$getSize()-1)))
- {
- data$removeLast()
- }
data
}
#' @export
plotPredReal <- function(data, pred, index)
{
- horizon = length(pred$getForecast(1))
- measure = data$getSerie( pred$getIndexInData(index)+1 )[1:horizon]
prediction = pred$getForecast(index)
+ measure = data$getSerie( pred$getIndexInData(index) )[length(prediction)]
yrange = range(measure, prediction)
par(mar=c(4.7,5,1,1), cex.axis=1.5, cex.lab=1.5, lwd=3)
plot(measure, type="l", ylim=yrange, xlab="Time (hours)", ylab="PM10")
#' @param index Index in forecast (integer or date)
#' @param limit Number of neighbors to consider
#' @param plot Should the result be plotted?
+#' @param predict_from First prediction instant
#'
#' @return A list with
#' \itemize{
#' }
#'
#' @export
-computeFilaments <- function(data, pred, index, limit=60, plot=TRUE)
+computeFilaments <- function(data, pred, index, predict_from, limit=60, plot=TRUE)
{
- ref_serie = data$getCenteredSerie( pred$getIndexInData(index) )
- if (any(is.na(ref_serie)))
+ if (is.null(pred$getParams(index)$weights) || is.na(pred$getParams(index)$weights[1]))
stop("computeFilaments requires a serie without NAs")
# Compute colors for each neighbor (from darkest to lightest)
if (plot)
{
# Complete series with (past and present) tomorrows
- ref_serie = c(ref_serie, data$getCenteredSerie( pred$getIndexInData(index)+1 ))
+ ref_serie = c( data$getCenteredSerie( pred$getIndexInData(index)-1 ),
+ data$getCenteredSerie( pred$getIndexInData(index) ) )
centered_series = rbind(
data$getCenteredSeries( pred$getParams(index)$indices ),
data$getCenteredSeries( pred$getParams(index)$indices+1 ) )
}
# Also plot ref curve, in red
plot(ref_serie, ylim=yrange, type="l", col="#FF0000", xlab="", ylab="")
- abline(v=24, lty=2, col=colors()[56], lwd=1)
+ abline(v=24+predict_from-0.5, lty=2, col=colors()[56], lwd=1)
}
list(
#' @param fil Output of \code{computeFilaments}
#'
#' @export
-plotFilamentsBox = function(data, fil)
+plotFilamentsBox = function(data, fil, predict_from)
{
if (!requireNamespace("rainbow", quietly=TRUE))
stop("Functional boxplot requires the rainbow package")
par(new=TRUE)
plot(c(data$getSerie(fil$index),data$getSerie(fil$index+1)), type="l", lwd=2, lty=2,
ylim=c(usr[3] + yr, usr[4] - yr), xlab="", ylab="")
- abline(v=24, lty=2, col=colors()[56])
+ abline(v=24+predict_from-0.5, lty=2, col=colors()[56])
}
#' Plot relative conditional variability / absolute variability
#' @inheritParams plotFilamentsBox
#'
#' @export
-plotRelVar = function(data, fil)
+plotRelVar = function(data, fil, predict_from)
{
ref_var = c( apply(data$getSeries(fil$neighb_indices),1,sd),
apply(data$getSeries(fil$neighb_indices+1),1,sd) )
xlab="Time (hours)", ylab="Standard deviation")
par(new=TRUE)
plot(global_var, type="l", col=2, lwd=3, ylim=yrange, xlab="", ylab="")
- abline(v=24, lty=2, col=colors()[56])
+ abline(v=24+predict_from-0.5, lty=2, col=colors()[56])
}
.getNoNA2 = function(data, first, last)
{
(first:last)[ sapply(first:last, function(i)
- !any( is.na(data$getCenteredSerie(i)) | is.na(data$getCenteredSerie(i+1)) )
+ !any( is.na(data$getSerie(i)) | is.na(data$getSerie(i+1)) )
) ]
}
time = as.POSIXct((i-1)*60*60*24+15*60*(1:96), origin="2007-01-01", tz="GMT")
exo = runif(4)
exo_hat = runif(4)
- data$append(time, serie, exo, exo_hat)
+ data$append(time=time, value=serie, exo=exo, exo_hat=exo_hat)
}
data
}
ts_data = system.file("testdata","ts_test.csv",package="talweg")
exo_data = system.file("testdata","exo_test.csv",package="talweg")
-data0 <<- getData(ts_data, exo_data, input_tz="GMT", date_format="%Y-%m-%d %H:%M",
- working_tz="GMT", predict_at=0, limit=Inf)
-data7 <<- getData(ts_data, exo_data, input_tz="GMT", date_format="%Y-%m-%d %H:%M",
- working_tz="GMT", predict_at=7, limit=Inf)
+data0 <<- getData(ts_data, exo_data, date_format="%Y-%m-%d %H:%M", limit=Inf)
+data7 <<- getData(ts_data, exo_data, date_format="%Y-%m-%d %H:%M", limit=Inf)
test_that("dateIndexToInteger works as expected",
{
ts_data = system.file("testdata","ts_test.csv",package="talweg")
exo_data = system.file("testdata","exo_test.csv",package="talweg")
-data00 <<- getData(ts_data, exo_data, input_tz="GMT", date_format="%Y-%m-%d %H:%M",
- working_tz="GMT", predict_at=0, limit=Inf)
-data13 <<- getData(ts_data, exo_data, input_tz="GMT", date_format="%Y-%m-%d %H:%M",
- working_tz="GMT", predict_at=13, limit=Inf)
-#Forecast at sunday to saturday (series 7 to 1), for monday to sunday (series 1 to 7)
-indices <<- seq(as.Date("2007-04-01"),as.Date("2007-04-07"),"days")
+data_p <<- getData(ts_data, exo_data, date_format="%Y-%m-%d %H:%M", limit=Inf)
+#Forecasts from monday to sunday (series 1 to 7)
+indices <<- seq(as.Date("2007-04-02"),as.Date("2007-04-08"),"days")
pred_order = c(7,1:6) #will facilitate tests
test_that("Average method behave as expected",
{
- pred00_z = computeForecast(data00, indices, "Average", "Zero", Inf, 24)
- pred00_p = computeForecast(data00, indices, "Average", "Persistence", Inf, 24)
+ pred00_z = computeForecast(data_p, indices, "Average", "Zero", 1, Inf, 24, ncores=1)
+ pred00_p = computeForecast(data_p, indices, "Average", "Persistence", 1, Inf, 24)
for (i in 1:7)
{
#zero jump: should predict true values minus 1
expect_equal( pred00_p$getForecast(i), rep(i,24) )
}
- #NOTE: days become
+ #NOTE: 24h-block become
#1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 (14h-->0h then 1h-->13h)
#No jump between days, thus zero and persistence are equivalent (and correct)
- pred13_z = computeForecast(data13, indices, "Average", "Zero", Inf, 24)
- pred13_p = computeForecast(data13, indices, "Average", "Persistence", Inf, 24)
+ pred13_z = computeForecast(data_p, indices, "Average", "Zero", 14, Inf, 24)
+ pred13_p = computeForecast(data_p, indices, "Average", "Persistence", 14, Inf, 24)
for (i in 1:7)
{
- expect_equal( pred13_z$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) )
- expect_equal( pred13_p$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) )
+ expect_equal( pred13_z$getForecast(i), rep(i,24) )
+ expect_equal( pred13_p$getForecast(i), rep(i,24) )
}
#A few extra checks
- expect_equal( pred00_p$getIndexInData(2), dateIndexToInteger("2007-04-02",data00) )
- expect_equal( pred00_z$getIndexInData(2), dateIndexToInteger("2007-04-02",data00) )
- expect_equal( pred13_p$getIndexInData(5), dateIndexToInteger("2007-04-05",data13) )
- expect_equal( pred13_z$getIndexInData(5), dateIndexToInteger("2007-04-05",data13) )
+ expect_equal( pred00_p$getIndexInData(2), dateIndexToInteger("2007-04-03",data_p) )
+ expect_equal( pred00_z$getIndexInData(2), dateIndexToInteger("2007-04-03",data_p) )
+ expect_equal( pred13_p$getIndexInData(5), dateIndexToInteger("2007-04-06",data_p) )
+ expect_equal( pred13_z$getIndexInData(5), dateIndexToInteger("2007-04-06",data_p) )
})
test_that("Persistence method behave as expected",
{
#Situation A: +Zero; (generally) correct if jump, wrong otherwise
- pred00_sd = computeForecast(data00, indices, "Persistence", "Zero", Inf, 24,
+ pred00_sd = computeForecast(data_p, indices, "Persistence", "Zero", 1, Inf, 24,
ncores=1, same_day=TRUE)
- pred00_dd = computeForecast(data00, indices, "Persistence", "Zero", Inf, 24,
+ pred00_dd = computeForecast(data_p, indices, "Persistence", "Zero", 1, Inf, 24,
ncores=1, same_day=FALSE)
for (i in 1:7)
{
expect_equal(pred00_dd$getForecast(i), rep(pred_order[i],24))
}
- pred13_sd = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24,
+ pred13_sd = computeForecast(data_p, indices, "Persistence", "Zero", 14, Inf, 24,
ncores=1, same_day=TRUE)
- pred13_dd = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24,
+ pred13_dd = computeForecast(data_p, indices, "Persistence", "Zero", 14, Inf, 24,
ncores=1, same_day=FALSE)
- for (i in 2:6)
+ for (i in 1:7)
{
- expect_equal(pred13_sd$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) )
- expect_equal(pred13_dd$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) )
+ expect_equal(pred13_sd$getForecast(i), rep(i,24) )
+ expect_equal(pred13_dd$getForecast(i), rep(i,24) )
}
- #boundaries are special cases: OK if same day, quite wrong otherwise
- expect_equal(pred13_sd$getForecast(1), c( rep(1,11), rep(2,13) ) )
- expect_equal(pred13_dd$getForecast(1), c( rep(1,11), rep(-5,13) ) )
- expect_equal(pred13_sd$getForecast(7), c( rep(7,11), rep(1,13) ) )
- expect_equal(pred13_dd$getForecast(7), c( rep(7,11), rep(8,13) ) )
#Situation B: +Persistence, (generally) correct
- pred00_sd = computeForecast(data00, indices, "Persistence", "Persistence", Inf, 24,
+ pred00_sd = computeForecast(data_p, indices, "Persistence", "Persistence", 1, Inf, 24,
ncores=1, same_day=TRUE)
- pred00_dd = computeForecast(data00, indices, "Persistence", "Persistence", Inf, 24,
+ pred00_dd = computeForecast(data_p, indices, "Persistence", "Persistence", 1, Inf, 24,
ncores=1, same_day=FALSE)
for (i in 3:7)
{
expect_equal(pred00_sd$getForecast(2), rep(2,24) )
expect_equal(pred00_dd$getForecast(2), rep(-5,24) )
- pred13_sd = computeForecast(data13, indices, "Persistence", "Persistence", Inf, 24,
+ pred13_sd = computeForecast(data_p, indices, "Persistence", "Persistence", 14, Inf, 24,
ncores=1, same_day=TRUE)
- pred13_dd = computeForecast(data13, indices, "Persistence", "Persistence", Inf, 24,
+ pred13_dd = computeForecast(data_p, indices, "Persistence", "Persistence", 14, Inf, 24,
ncores=1, same_day=FALSE)
- for (i in 2:6)
+ for (i in 1:7)
{
- expect_equal(pred13_sd$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) )
- expect_equal(pred13_dd$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) )
+ expect_equal(pred13_sd$getForecast(i), rep(i,24) )
+ expect_equal(pred13_dd$getForecast(i), rep(i,24) )
}
- #boundaries are special cases: OK if same day, quite wrong otherwise
- expect_equal(pred13_sd$getForecast(1), c( rep(1,11), rep(2,13) ) )
- expect_equal(pred13_dd$getForecast(1), c( rep(1,11), rep(-5,13) ) )
- expect_equal(pred13_sd$getForecast(7), c( rep(7,11), rep(1,13) ) )
- expect_equal(pred13_dd$getForecast(7), c( rep(7,11), rep(8,13) ) )
#A few extra checks
- expect_equal( pred00_sd$getIndexInData(3), dateIndexToInteger("2007-04-03",data00) )
- expect_equal( pred00_dd$getIndexInData(6), dateIndexToInteger("2007-04-06",data00) )
- expect_equal( pred13_sd$getIndexInData(3), dateIndexToInteger("2007-04-03",data13) )
- expect_equal( pred13_dd$getIndexInData(6), dateIndexToInteger("2007-04-06",data13) )
+ expect_equal( pred00_sd$getIndexInData(3), dateIndexToInteger("2007-04-04",data_p) )
+ expect_equal( pred00_dd$getIndexInData(6), dateIndexToInteger("2007-04-07",data_p) )
+ expect_equal( pred13_sd$getIndexInData(3), dateIndexToInteger("2007-04-04",data_p) )
+ expect_equal( pred13_dd$getIndexInData(6), dateIndexToInteger("2007-04-07",data_p) )
})
test_that("Neighbors method behave as expected",
{
#Situation A: +Zero; correct if jump, wrong otherwise
- pred00 = computeForecast(data00, indices, "Neighbors", "Zero", Inf, 24,
+ pred00 = computeForecast(data_p, indices, "Neighbors", "Zero", 1, Inf, 24,
simtype="mix", local=FALSE)
for (i in 1:7)
expect_equal(pred00$getForecast(i), rep(pred_order[i],24))
- pred13 = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24,
+ pred13 = computeForecast(data_p, indices, "Persistence", "Zero", 14, Inf, 24,
simtype="mix", local=FALSE)
for (i in 1:7)
- expect_equal(pred13$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) )
+ expect_equal(pred13$getForecast(i), rep(i,24) )
#Situation B: +Neighbors == too difficult to eval in a unit test
-# pred00 = computeForecast(data00, indices, "Neighbors", "Neighbors", Inf, 24,
+# pred00 = computeForecast(data_p, indices, "Neighbors", "Neighbors", 1, Inf, 24,
# simtype="endo", local=FALSE)
# jumps = ...
# for (i in 1:7)
# expect_equal(pred00$getForecast(i), rep(pred_order[i]+jumps[i],24))
-# pred13 = computeForecast(data13, indices, "Neighbors", "Neighbors", Inf, 24,
+# pred13 = computeForecast(data_p, indices, "Neighbors", "Neighbors", 14, Inf, 24,
# simtype="endo", local=FALSE)
# for (i in 1:7)
# expect_equal(pred13$getForecast(i), c( rep(i,11), rep(i%%7+1,13) ) )
#A few extra checks
- expect_equal( pred00$getIndexInData(1), dateIndexToInteger("2007-04-01",data00) )
- expect_equal( pred00$getIndexInData(4), dateIndexToInteger("2007-04-04",data00) )
- expect_equal( pred13$getIndexInData(1), dateIndexToInteger("2007-04-01",data13) )
- expect_equal( pred13$getIndexInData(4), dateIndexToInteger("2007-04-04",data13) )
+ expect_equal( pred00$getIndexInData(1), dateIndexToInteger("2007-04-02",data_p) )
+ expect_equal( pred00$getIndexInData(4), dateIndexToInteger("2007-04-05",data_p) )
+ expect_equal( pred13$getIndexInData(1), dateIndexToInteger("2007-04-02",data_p) )
+ expect_equal( pred13$getIndexInData(4), dateIndexToInteger("2007-04-05",data_p) )
})
{
data = getDataTest(150)
- # index 143 : serie type 2
- pred = computeForecast(data, 143, "Neighbors", "Zero",
+ # index 144-1 == 143 : serie type 2
+ pred = computeForecast(data, 144, "Neighbors", "Zero", predict_from=8,
horizon=length(data$getSerie(1)), simtype="endo", local=FALSE, h_window=1)
- f = computeFilaments(data, pred, 1, limit=60, plot=FALSE)
+ f = computeFilaments(data, pred, 1, 8, limit=60, plot=FALSE)
# Expected output: 50-3-10 series of type 2, then 23 series of type 3 (closest next)
expect_identical(length(f$neighb_indices), as.integer(60))
expect_identical(length(f$colors), as.integer(60))
- expect_equal(f$index, 143)
+ expect_equal(f$index, 144)
expect_true(all(I(f$neighb_indices) >= 2))
for (i in 1:37)
{
expect_match(f$colors[1], "#1*")
expect_match(f$colors[38], "#E*")
- # index 142 : serie type 1
- pred = computeForecast(data, 142, "Neighbors", "Zero",
+ # index 143-1 == 142 : serie type 1
+ pred = computeForecast(data, 143, "Neighbors", "Zero", predict_from=8,
horizon=length(data$getSerie(1)), simtype="endo", local=FALSE, h_window=1)
- f = computeFilaments(data, pred, 1, limit=50, plot=FALSE)
+ f = computeFilaments(data, pred, 1, 8, limit=50, plot=FALSE)
# Expected output: 50-10-3 series of type 1, then 13 series of type 3 (closest next)
# NOTE: -10 because only past days with no-NAs tomorrow => exclude type 1 in [60,90[
expect_identical(length(f$neighb_indices), as.integer(50))
expect_identical(length(f$colors), as.integer(50))
- expect_equal(f$index, 142)
+ expect_equal(f$index, 143)
expect_true(all(I(f$neighb_indices) != 2))
for (i in 1:37)
{
package="talweg"))
exo_data <- read.csv(system.file("extdata","meteo_extra_noNAs.csv",
package="talweg"))
-data <- getData(ts_data, exo_data, input_tz="GMT",
- date_format="%d/%m/%Y %H:%M", working_tz="GMT",
- predict_at=7, limit=120)
+data <- getData(ts_data, exo_data,
+ date_format="%d/%m/%Y %H:%M", limit=120)
# Plus de détails à la section 1 ci-après.
# Prédiction de 10 courbes (jours 102 à 111)
-pred <- computeForecast(data, 101:110, "Persistence", "Zero", memory=50,
- horizon=12, ncores=1)
+pred <- computeForecast(data, 101:110, "Persistence", "Zero",
+ predict_from=8, memory=50, horizon=24, ncores=1)
# Plus de détails à la section 2 ci-après.
# Calcul des erreurs (sur un horizon arbitraire <= horizon de prédiction)
-err <- computeError(data, pred, horizon=6)
+err <- computeError(data, pred, predict_from=8, horizon=20)
# Plus de détails à la section 3 ci-après.
# Puis voir ?plotError et les autres plot dans le paragraphe 'seealso'
première colonne contient les jours, les $m$ suivantes les variables mesurées pour ce
jour, et les $m$ dernières les variables prédites pour ce même jour. Dans notre cas $m=4$
: pression, température, gradient de température, vitesse du vent.
- 3. **input_tz** : zone horaire pour ts_data (défaut : "GMT").
- 4. **date_format** : format des heures dans ts_data (défaut : "%d/%m/%Y %H:%M", format
-du fichier transmis par Michel).
- 5. **working_tz** : zone horaire dans laquelle on souhaite travailler avec les données
-(défaut : "GMT").
- 6. **predict_at** : heure à laquelle s'effectue la prévision $-$ et donc dernière heure
-d'un bloc de 24h, relativement à working_tz. data`$`getSerie(3) renvoit ainsi les 24
-valeurs de 8h à 7h pour le $3^{eme}$ bloc de 24h présent dans le jeu de données.
+ 3. **date_format** : format des heures dans ts_data (défaut : "%d/%m/%Y %H:%M", format
+du fichier transmis par Michel Bobbia).
+ 4. **limit** : nombre de séries à récupérer.
-----r
print(data)
#?Data
?computeForecast
5. **memory** : le nombre de jours à prendre en compte dans le passé pour chaque
prévision (par défaut : Inf, c'est-à-dire tout l'historique pris en compte).
- 6. **horizon** : le nombre d'heures à prédire ; par défaut "data`$`getStdHorizon()",
-c'est-à-dire le nombre d'heures restantes à partir de l'instant de prévision + 1 jusqu'à
-minuit (17 pour predict_at=7 par exemple).
- 7. **ncores** : le nombre de processus parallèles (utiliser 1 pour une exécution
+ 6. **predict_from** : première heure de prévision. Les séries prévues démarrent
+cependant toutes à 1h du matin (en reprenant les premières valeurs connues).
+ 7. **horizon** : dernière heure de prévision ; maximum 24 == minuit (valeur par défaut).
+pred`$`getForecast(i) retourne une journée complète de 01:00 à 00:00 si horizon=24.
+ 8. **ncores** : le nombre de processus parallèles (utiliser 1 pour une exécution
séquentielle)
-----r
print(pred)
1. **data** : le jeu de données renvoyé par getData()
2. **pred** : les prédictions renvoyées par computeForecast()
- 3. **horizon** : le nombre d'heures à considérer pour le calcul de l'erreur ; doit être
-inférieur ou égal à l'horizon utilisé pour la prédiction (même valeur par défaut :
-"data`$`getStdHorizon()")
+ 3. **predict_from** : première heure de prévision ; peut être différente de l'analogue
+dans l'appel à *computeForecast()*.
+ 4. **horizon** : dernière heure de prévision à considérer pour le calcul de l'erreur ;
+inférieure ou égale à la valeur de l'argument analogue dans *computeForecast()*
-----r
summary(err)
summary(err$abs)