intermediate: R6, too slow
authorBenjamin Auder <benjamin.auder@somewhere>
Mon, 27 Feb 2017 12:52:01 +0000 (13:52 +0100)
committerBenjamin Auder <benjamin.auder@somewhere>
Mon, 27 Feb 2017 12:52:01 +0000 (13:52 +0100)
14 files changed:
data/scripts/repair_HACK.R [new file with mode: 0644]
pkg/DESCRIPTION
pkg/R/Data.R
pkg/R/F_Average.R
pkg/R/F_Neighbors.R
pkg/R/F_Persistence.R
pkg/R/F_Zero.R
pkg/R/Forecast.R
pkg/R/Forecaster.R
pkg/R/computeForecast.R
pkg/R/getData.R
pkg/R/plot.R
pkg/R/utils.R
reports/report_2017-03-01.7h_average.ipynb

diff --git a/data/scripts/repair_HACK.R b/data/scripts/repair_HACK.R
new file mode 100644 (file)
index 0000000..69ae774
--- /dev/null
@@ -0,0 +1,27 @@
+                       # HACK for test reports: complete some days with a few NAs, for nicer graphics
+                       nas_in_serie = is.na(data$getSerie(today))
+                       if (any(nas_in_serie))
+                       {
+                               if (sum(nas_in_serie) >= length(nas_in_serie) / 2)
+                                       return (NA)
+                               for (i in seq_along(nas_in_serie))
+                               {
+                                       if (nas_in_serie[i])
+                                       {
+                                               #look left
+                                               left = i-1
+                                               while (left>=1 && nas_in_serie[left])
+                                                       left = left-1
+                                               #look right
+                                               right = i+1
+                                               while (right<=length(nas_in_serie) && nas_in_serie[right])
+                                                       right = right+1
+                                               #HACK: modify by-reference Data object...
+                                               data$data[[today]]$serie[i] <<-
+                                                       if (left==0) data$data[[today]]$serie[right]
+                                                       else if (right==0) data$data[[today]]$serie[left]
+                                                       else (data$data[[today]]$serie[left] + data$data[[today]]$serie[right]) / 2.
+                                       }
+                               }
+                       }
+
index 38b442e..e83e31e 100644 (file)
@@ -10,7 +10,8 @@ Authors: Benjamin Auder <Benjamin.Auder@math.u-psud.fr> [aut,cre],
     Bruno Portier <Bruno.Portier@insa-rouen.fr>, [ctb]
 Maintainer: Benjamin Auder <Benjamin.Auder@math.u-psud.fr>
 Depends:
-    R (>= 3.0)
+    R (>= 3.0),
+               R6
 Suggests:
     roxygen2,
     testthat,
@@ -32,9 +33,9 @@ Collate:
     'J_Neighbors.R'
     'J_Persistence.R'
     'J_Zero.R'
-    'getData.R'
     'computeError.R'
     'computeForecast.R'
+    'getData.R'
     'plot.R'
     'utils.R'
 VignetteBuilder: knitr
index 27aa082..d970f7b 100644 (file)
 #' @importFrom R6 R6Class
 #'
 #' @export
-Data = R6Class("Data",
+Data = R6::R6Class("Data",
        private = list(
-               .data = "list"
+               .data = list()
        ),
        public = list(
-               initialize = function(...)
-                       initialize(self, private, ...)
-               ,
                getSize = function()
-                       getSize(private)
+                       getSizeData(private)
                ,
                getStdHorizon = function()
-                       getStdHorizon(private)
+                       getStdHorizonData(private)
                ,
                append = function(new_time, new_centered_serie, new_level, new_exo, new_exo_hat)
-                       append(private, new_time, new_centered_serie, new_level, new_exo, new_exo_hat)
+                       appendData(private, new_time, new_centered_serie, new_level, new_exo, new_exo_hat)
                ,
                getTime = function(index)
-                       getTime(self, private, index)
+                       getTimeData(self, private, index)
                ,
                getCenteredSerie = function(index)
-                       getCenteredSerie(self, private, index)
+                       getCenteredSerieData(self, private, index)
                ,
                getLevel = function(index)
-                       getLevel(self, private, index)
+                       getLevelData(self, private, index)
                ,
                getSerie = function(index)
-                       getSerie(self, private, index)
+                       getSerieData(self, private, index)
                ,
                getExo = function(index)
-                       getExo(self, private, index)
+                       getExoData(self, private, index)
                ,
                getExoHat = function(index)
-                       getExoHat(self, private, index)
+                       getExoHatData(self, private, index)
        )
 )
 
-##TODO: @param... @inheritParams...
-
-#' Initialize empty Data object
-initialize = function(o, private, ...)
-{
-       private$.data <<- if (hasArg("data")) list(...)$data else list()
-       invisible(o)
-}
-
 #' Number of series in the dataset
-getSize = function(private)
+#'
+#' @param private List of private members in current object
+getSizeData = function(private)
        length(private$.data)
 
 #' 'Standard' horizon, from t+1 to midnight
-getStdHorizon = function(private)
+#'
+#' @inheritParams getSizeData
+getStdHorizonData = function(private)
        24 - as.POSIXlt( private$.data[[1]]$time[1] )$hour + 1
 
 #' Acquire a new vector of lists (time, centered_serie, level, exo, exo_hat)
-append = function(private, new_time, new_centered_serie, new_level, new_exo_hat, new_exo)
+#'
+#' @inheritParams getSizeData
+#' @param new_time Time
+#' @param new_centered_serie Centered serie
+#' @param new_level Level
+#' @param new_exo Exogneous variables
+#' @param new_exo_hat Predicted exogenous variables
+appendData = function(private, new_time, new_centered_serie, new_level, new_exo, new_exo_hat)
 {
-       private$.data[[length(private$.data)+1]] <<- list("time"=new_time,
+       private$.data[[length(private$.data)+1]] <- list("time"=new_time,
                "centered_serie"=new_centered_serie,"level"=new_level,"exo"=new_exo,"exo_hat"=new_exo_hat)
 }
 
 #' Time values at specified index
-getTime = function(o, private, index)
+#'
+#' @inheritParams getSizeData
+#' @param index Return value at this index
+getTimeData = function(o, private, index)
 {
        index = dateIndexToInteger(index, o)
        private$.data[[index]]$time
 }
 
 #' Centered serie values at specified index
-getCenteredSerie = function(o, private, index)
+#'
+#' @inheritParams getTimeData
+getCenteredSerieData = function(o, private, index)
 {
        index = dateIndexToInteger(index, o)
-       private$.data[[index]]$serie
+       private$.data[[index]]$centered_serie
 }
 
 #' Level of the serie at specified index
-getCenteredSerie = function(o, private, index)
+#'
+#' @inheritParams getTimeData
+getLevelData = function(o, private, index)
 {
        index = dateIndexToInteger(index, o)
        private$.data[[index]]$level
 }
 
 #' Serie values (centered+level) at specified index
-getCenteredSerie = function(o, private, index)
+#'
+#' @inheritParams getTimeData
+getSerieData = function(o, private, index)
 {
        index = dateIndexToInteger(index, o)
-       private$.data[[index]]$serie + data[[index]]$level
+       private$.data[[index]]$centered_serie + data[[index]]$level
 }
 
 #' Exogenous measures at specified index
-getCenteredSerie = function(o, private, index)
+#'
+#' @inheritParams getTimeData
+getExoData = function(o, private, index)
 {
        index = dateIndexToInteger(index, o)
        private$.data[[index]]$exo
 }
 
 #' Exogeous predictions at specified index
-getCenteredSerie = function(o, private, index)
+#'
+#' @inheritParams getTimeData
+getExoHatData = function(o, private, index)
 {
        index = dateIndexToInteger(index, o)
        private$.data[[index]]$exo_hat
index ba0f003..abc88be 100644 (file)
@@ -1,34 +1,13 @@
 #' @include Forecaster.R
 #'
-#' @title Average Forecaster
+#' Average Forecaster
 #'
-#' @description Return the (pointwise) average of the all the (similar) centered day curves
-#'   in the past. Inherits \code{\link{Forecaster}}
-AverageForecaster = setRefClass(
-       Class = "AverageForecaster",
-       contains = "Forecaster",
+#' Return the (pointwise) average of the all the (similar) centered day curves
+#' in the past. Inherits \code{\link{Forecaster}}
+AverageForecaster = R6::R6Class("AverageForecaster",
+       inherit = "Forecaster",
 
-       methods = list(
-               initialize = function(...)
-               {
-                       callSuper(...)
-               },
-               predict = function(today, memory, horizon, ...)
-               {
-                       predicted_shape = predictShape(today, memory, horizon, ...)
-                       #Take care of never passing same_day==FALSE (when pjump == Persistence)
-                       predicted_delta =
-                               if (#as.character(substitute(pjump))=="Persistence" && #TODO: doesn't work
-                                       hasArg("same_day") && list(...)$same_day==FALSE)
-                               {
-                                       args = list(...)
-                                       args$same_day = TRUE
-                                       do.call(pjump, append(list("today"=today,"memory"=memory,"horizon"=horizon), args))
-                               }
-                               else
-                                       pjump(data, today, memory, horizon, params, ...)
-                       predicted_shape + tail(data$getSerie(today),1) - predicted_shape[1] + predicted_delta
-               },
+       public = list(
                predictShape = function(today, memory, horizon, ...)
                {
                        avg = rep(0., horizon)
index 43a6a13..ac0df04 100644 (file)
@@ -1,18 +1,13 @@
 #' @include Forecaster.R
 #'
-#' @title Neighbors Forecaster
+#' Neighbors Forecaster
 #'
-#' @description Predict tomorrow as a weighted combination of "futures of the past" days.
-#'   Inherits \code{\link{Forecaster}}
-NeighborsForecaster = setRefClass(
-       Class = "NeighborsForecaster",
-       contains = "Forecaster",
-
-       methods = list(
-               initialize = function(...)
-               {
-                       callSuper(...)
-               },
+#' Predict tomorrow as a weighted combination of "futures of the past" days.
+#' Inherits \code{\link{Forecaster}}
+NeighborsForecaster = R6::R6Class("NeighborsForecaster",
+       inherit = "Forecaster",
+
+       public = list(
                predictShape = function(today, memory, horizon, ...)
                {
                        # (re)initialize computed parameters
@@ -24,33 +19,6 @@ NeighborsForecaster = setRefClass(
                        if (hasArg(h_window))
                                return (.predictShapeAux(fdays,today,horizon,list(...)$h_window,kernel,simtype,TRUE))
 
-                       # HACK for test reports: complete some days with a few NAs, for nicer graphics
-                       nas_in_serie = is.na(data$getSerie(today))
-                       if (any(nas_in_serie))
-                       {
-                               if (sum(nas_in_serie) >= length(nas_in_serie) / 2)
-                                       return (NA)
-                               for (i in seq_along(nas_in_serie))
-                               {
-                                       if (nas_in_serie[i])
-                                       {
-                                               #look left
-                                               left = i-1
-                                               while (left>=1 && nas_in_serie[left])
-                                                       left = left-1
-                                               #look right
-                                               right = i+1
-                                               while (right<=length(nas_in_serie) && nas_in_serie[right])
-                                                       right = right+1
-                                               #HACK: modify by-reference Data object...
-                                               data$data[[today]]$serie[i] <<-
-                                                       if (left==0) data$data[[today]]$serie[right]
-                                                       else if (right==0) data$data[[today]]$serie[left]
-                                                       else (data$data[[today]]$serie[left] + data$data[[today]]$serie[right]) / 2.
-                                       }
-                               }
-                       }
-
                        # Determine indices of no-NAs days followed by no-NAs tomorrows
                        first_day = max(today - memory, 1)
                        fdays = (first_day:(today-1))[ sapply(first_day:(today-1), function(i) {
@@ -92,12 +60,12 @@ NeighborsForecaster = setRefClass(
                                h_best_mix = c(h_best_endo,h_best_exo)
                                return (.predictShapeAux(fdays, today, horizon, h_best_mix,  kernel, "mix",  TRUE))
                        }
-               },
+               }
+       ),
+       private = list(
                # Precondition: "today" is full (no NAs)
                .predictShapeAux = function(fdays, today, horizon, h, kernel, simtype, final_call)
                {
-                       dat = data$data #HACK: faster this way...
-
                        fdays = fdays[ fdays < today ]
                        # TODO: 3 = magic number
                        if (length(fdays) < 3)
@@ -111,7 +79,7 @@ NeighborsForecaster = setRefClass(
                                distances2 = rep(NA, length(fdays))
                                for (i in seq_along(fdays))
                                {
-                                       delta = dat[[today]]$serie - dat[[ fdays[i] ]]$serie
+                                       delta = data$getCenteredSerie(today) - data$getCenteredSerie(fdays[i])
                                        # Require at least half of non-NA common values to compute the distance
                                        if (sum(is.na(delta)) <= 0) #length(delta)/2)
                                                distances2[i] = mean(delta^2) #, na.rm=TRUE)
@@ -134,10 +102,10 @@ NeighborsForecaster = setRefClass(
                        {
                                h_exo = ifelse(simtype=="mix", h[2], h)
 
-                               M = matrix( nrow=1+length(fdays), ncol=1+length(dat[[today]]$exo) )
-                               M[1,] = c( dat[[today]]$level, as.double(dat[[today]]$exo) )
+                               M = matrix( nrow=1+length(fdays), ncol=1+length(data$getExo(today)) )
+                               M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) )
                                for (i in seq_along(fdays))
-                                       M[i+1,] = c( dat[[ fdays[i] ]]$level, as.double(dat[[ fdays[i] ]]$exo) )
+                                       M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) )
 
                                sigma = cov(M) #NOTE: robust covariance is way too slow
                                sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed?
@@ -171,7 +139,7 @@ NeighborsForecaster = setRefClass(
 
                        prediction = rep(0, horizon)
                        for (i in seq_along(fdays_indices))
-                               prediction = prediction + similarities[i] * dat[[ fdays_indices[i]+1 ]]$serie[1:horizon]
+                               prediction = prediction + similarities[i] * data$getSerie(fdays_indices[i]+1)[1:horizon]
                        prediction = prediction / sum(similarities, na.rm=TRUE)
 
                        if (final_call)
index 19cf2e4..39e043c 100644 (file)
@@ -1,18 +1,13 @@
 #' @include Forecaster.R
 #'
-#' @title Persistence Forecaster
+#' Persistence Forecaster
 #'
-#' @description Return the last centered (similar) day curve.
-#'   Inherits \code{\link{Forecaster}}
-PersistenceForecaster = setRefClass(
-       Class = "PersistenceForecaster",
-       contains = "Forecaster",
+#' Return the last centered (similar) day curve.
+#' Inherits \code{\link{Forecaster}}
+PersistenceForecaster = R6::R6Class("PersistenceForecaster",
+       inherit = "Forecaster",
 
-       methods = list(
-               initialize = function(...)
-               {
-                       callSuper(...)
-               },
+       public = list(
                predictShape = function(today, memory, horizon, ...)
                {
                        # Return centered last (similar) day curve, avoiding NAs until memory is run
index 247ab04..ff6eb52 100644 (file)
@@ -1,20 +1,13 @@
 #' @include Forecaster.R
 #'
-#' @title Zero Forecaster
+#' Zero Forecaster
 #'
-#' @description Return 0 (and then adjust). Inherits \code{\link{Forecaster}}
-ZeroForecaster = setRefClass(
-       Class = "ZeroForecaster",
-       contains = "Forecaster",
+#' Return 0 (and then adjust). Inherits \code{\link{Forecaster}}
+ZeroForecaster = R6::R6Class("ZeroForecaster",
+       inherit = "Forecaster",
 
-       methods = list(
-               initialize = function(...)
-               {
-                       callSuper(...)
-               },
+       public = list(
                predictShape = function(today, memory, horizon, ...)
-               {
                        rep(0., horizon)
-               }
        )
 )
index 274f50e..b68b6c9 100644 (file)
@@ -1,6 +1,6 @@
-#' @title Forecast
+#' Forecast
 #'
-#' @description Forecast encapsulation
+#' Forecast encapsulation
 #'
 #' @field pred List with
 #' \itemize{
@@ -9,51 +9,99 @@
 #'   \item index: corresponding index in data object
 #' }
 #'
-#' @exportClass Forecast
-#' @export Forecast
-Forecast = setRefClass(
-       Class = "Forecast",
-
-       fields = list(
-               pred = "list"
+#' @docType class
+#' @importFrom R6 R6Class
+#'
+#' @export
+Forecast = R6::R6Class("Forecast",
+       private = list(
+               .pred = "list",
+               .dates = "Date"
        ),
-
-       methods = list(
-               initialize = function(...)
-               {
-                       "Initialize empty Forecast object"
-
-                       callSuper(...)
-               },
+       public = list(
+               initialize = function(dates)
+                       initialize(self, private, dates)
+               ,
                getSize = function()
-               {
-                       "Number of individual forecasts"
-
-                       length(pred)
-               },
+                       getSize(private)
+               ,
                append = function(new_serie, new_params, new_index)
-               {
-                       "Obtain a new pair (serie, params)"
-
-                       pred[[length(pred)+1]] <<- list("serie"=new_serie, "params"=new_params, "index"=new_index)
-               },
+                       append(private, new_serie, new_params, new_index)
+               ,
+               getDates = function()
+                       getDates(private)
+               ,
                getSerie = function(index)
-               {
-                       "Serie values at specified index"
-
-                       pred[[index]]$serie
-               },
+                       getSerie(private, index)
+               ,
                getParams = function(index)
-               {
-                       "Params at specified index"
-
-                       pred[[index]]$params
-               },
+                       getParams(private, index)
+               ,
                getIndexInData = function(index)
-               {
-                       "(day) Index in data where prediction took place"
-
-                       pred[[index]]$index
-               }
+                       getIndexInData(private, index)
        )
 )
+
+#' Initialize empty Forecast object
+#'
+#' @param o Object of class Forecast
+#' @param private List of private members in o
+#' @param dates vector of dates where forecast occurs
+initialize = function(o, private, dates)
+{
+       private$.dates <<- dates
+       private$.pred <<- list()
+       invisible(o)
+}
+
+#' Number of individual forecasts"
+#'
+#' @inheritParams initialize
+getSize = function(private)
+       length(private$.pred)
+
+#' Obtain a new pair (serie, params)"
+#'
+#' @inheritParams initialize
+append = function(new_serie, new_params, new_index_in_data)
+{
+       private$.pred[[length(private$.pred)+1]] <<-
+               list("serie"=new_serie, "params"=new_params, "index_in_data"=new_index_in_data)
+}
+
+#' Dates where prediction occurs
+#'
+#' inheritParams initialize
+getDates = function(private)
+       private$.dates
+
+#' Serie values at specified index"
+#'
+#' @inheritParams initialize
+#' @param index Return value at this index
+getSerie = function(index)
+{
+       if (is(index,"Date"))
+               index = match(index, private$.dates)
+       private$.pred[[index]]$serie
+}
+
+#' Params at specified index"
+#'
+#' @inheritParams getSerie
+getParams = function(index)
+{
+       if (is(index,"Date"))
+               index = match(index, private$.dates)
+       private$.pred[[index]]$params
+}
+
+#' (day) Index in data where prediction took place"
+#'
+#' @inheritParams getSerie
+getIndexInData = function(index)
+{
+       if (is(index,"Date"))
+               index = match(index, private$.dates)
+       private$.pred[[index]]$index_in_data
+}
index d5d5280..4c8437e 100644 (file)
@@ -1,50 +1,71 @@
-#' @title Forecaster (abstract class)
+#' Forecaster
 #'
-#' @description Abstract class to represent a forecaster (they all inherit this)
+#' Forecaster (abstract class, implemented by all forecasters)
 #'
 #' @field params List of computed parameters, for post-run analysis (dev)
 #' @field data Dataset, object of class Data
 #' @field pjump Function: how to predict the jump at day interface ?
-Forecaster = setRefClass(
-       Class = "Forecaster",
-
-       fields = list(
-               params = "list",
-               data = "Data",
-               pjump = "function"
+#'
+#' @docType class
+#' @importFrom R6 R6Class
+Forecaster = R6::R6Class("Forecaster",
+       private = list(
+               .params = "list",
+               .data = "Data",
+               .pjump = "function"
        ),
-
-       methods = list(
-               initialize = function(...)
-               {
-                       "Initialize (generic) Forecaster object"
-
-                       callSuper(...)
-                       if (!hasArg(data))
-                               stop("Forecaster must be initialized with a Data object")
-                       params <<- list()
-               },
-               predict = function(today, memory, horizon, ...)
-               {
-                       "Obtain a new forecasted time-serie"
-
-                       # Parameters (potentially) computed during shape prediction stage
-                       predicted_shape = predictShape(today, memory, horizon, ...)
-                       predicted_delta = pjump(data, today, memory, horizon, params, ...)
-                       # Predicted shape is aligned it on the end of current day + jump
-                       predicted_shape + tail(data$getSerie(today),1) - predicted_shape[1] + predicted_delta
-               },
+       public = list(
+               initialize = function(data, pjump)
+                       initialize(self, private, data, pjump)
+               ,
+               predictSerie = function(today, memory, horizon, ...)
+                       predictSerie(private, today, memory, horizon, ...)
+               ,
                predictShape = function(today, memory, horizon, ...)
-               {
-                       "Shape prediction (centered curve)"
-
-                       #empty default implementation: to implement in inherited classes
-               },
+                       predictShape(private, today, memory, horizon, ...)
+               ,
                getParameters = function()
-               {
-                       "Get parameters list"
-
-                       params
-               }
+                       getParameters(private)
        )
 )
+
+#' Initialize (generic) Forecaster object
+#'
+#' @param o Object of class Forecaster
+#' @param private List of private members in o
+#' @param data Object of class Data
+#' @param pjump Function to predict jump
+initialize = function(o, private, data, pjump)
+{
+       .params <<- list()
+       .data <<- data
+       .pjump <<- pjump
+       invisible(o)
+}
+
+#' Obtain a new forecasted time-serie
+#'
+#' @inheritParams initialize
+#' @param today Index for current prediction
+#' @param memory Depth in data (in days)
+#' @param horizon Number of hours to forecast
+predictSerie = function(private, today, memory, horizon, ...)
+{
+       # Parameters (potentially) computed during shape prediction stage
+       predicted_shape = predictShape(today, memory, horizon, ...)
+       predicted_delta = private$.pjump(private$.data, today, memory, horizon, params, ...)
+       # Predicted shape is aligned it on the end of current day + jump
+       predicted_shape + tail(private$.data$getSerie(today),1) - predicted_shape[1] + predicted_delta
+}
+
+#' Shape prediction (centered curve)
+#'
+#' @inheritParams predictSerie
+predictShape = function(private, today, memory, horizon, ...)
+       #empty default implementation: to implement in inherited classes
+
+#' Get parameters list
+#'
+#' @inheritParams initialize
+getParameters = function(private)
+       private$.params
index 1a7d8ba..0585e1b 100644 (file)
 #' @param horizon Number of time steps to predict
 #' @param ... Additional parameters for the forecasting models
 #'
-#' @return An object of class Forecast
+#' @return A list with the following items
+#' \itemize{
+#'   \item serie: forecasted serie
+#'   \item params: corresponding list of parameters (weights, neighbors...)
+#'   \item index: corresponding index in data object
+#' }
 #'
 #' @examples
 #' ts_data = system.file("extdata","pm10_mesures_H_loc.csv",package="talweg")
@@ -38,7 +43,7 @@
 #'   #do_something_with_pred
 #' }}
 #' @export
-computeForecast = function(data, indices, forecaster, pjump=NULL,
+computeForecast = function(data, indices, forecaster, pjump,
        memory=Inf, horizon=data$getStdHorizon(), ...)
 {
        # (basic) Arguments sanity checks
@@ -52,21 +57,17 @@ computeForecast = function(data, indices, forecaster, pjump=NULL,
        if (!is.character(forecaster))
                stop("forecaster (name) should be of class character") #pjump could be NULL
 
-       pred = list()
-       forecaster = new(paste(forecaster,"Forecaster",sep=""), data=data,
-               pjump =
-                       if (is.null(pjump))
-                               function() {}
-                       else
-                               getFromNamespace(paste("get",pjump,"JumpPredict",sep=""), "talweg"))
+       pred = Forecast$new()
+       forecaster_class_name = getFromNamespace(paste(forecaster,"Forecaster",sep=""), "talweg")
+       forecaster = forecaster_class_name$new(data=data,
+               pjump = getFromNamespace(paste("get",pjump,"JumpPredict",sep=""), "talweg"))
        for (today in indices)
        {
-               #pred$append(...) is slow; TODO: use R6 class
-               pred[[length(pred)+1]] = list(
-                       "serie" = forecaster$predict(today, memory, horizon, ...),
-                       "params" = forecaster$getParameters(),
-                       "index" = today
+               pred$append(
+                       new_serie = forecaster$predictSerie(today, memory, horizon, ...),
+                       new_params = forecaster$getParameters(),
+                       new_index = today
                )
        }
-       new("Forecast",pred=pred)
+       pred
 }
index da4b459..df94895 100644 (file)
@@ -2,8 +2,7 @@
 #'
 #' @description Take in input data frames and/or files containing raw data, and timezones, and
 #'   output a Data object, roughly corresponding to a list where each cell contains all value
-#'   for one day (see \code{?Data}). Current limitation: series (in working_tz) must start at
-#'   right after midnight (to keep in sync with exogenous vars)
+#'   for one day (see \code{?Data}).
 #'
 #' @param ts_data Time-series, as a data frame (DB style: 2 columns, first is date/time,
 #'   second is value) or a CSV file
 #' @param predict_at When does the prediction take place ? Integer, in hours. Default: 0
 #' @param limit Number of days to extract (default: Inf, for "all")
 #'
-#' @return An object of class Data
+#' @return A list where data[[i]] contains
+#'   \itemize{
+#'     \item time: vector of times
+#'     \item centered_serie: centered serie
+#'     \item level: corresponding level
+#'     \item exo: exogenous variables
+#'     \item exo_hat: predicted exogenous variables
+#'   }
 #'
 #' @examples
 #' ts_data = read.csv(system.file("extdata","pm10_mesures_H_loc.csv",package="talweg"))
 #' exo_data = read.csv(system.file("extdata","meteo_extra_noNAs.csv",package="talweg"))
-#' getData(ts_data, exo_data, input_tz="Europe/Paris", working_tz="Europe/Paris", limit=150)
+#' data = getData(ts_data, exo_data, limit=120)
 #' @export
 getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:%M",
        working_tz="GMT", predict_at=0, limit=Inf)
@@ -62,7 +68,7 @@ getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:%
        line = 1 #index in PM10 file (24 lines for 1 cell)
        nb_lines = nrow(ts_df)
        nb_exos = ( ncol(exo_df) - 1 ) / 2
-       data = Data$new()
+       data = list()
        i = 1 #index of a cell in data
        while (line <= nb_lines)
        {
@@ -83,18 +89,22 @@ getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:%
                exo_hat = as.data.frame( exo_df[i,(1+nb_exos+1):(1+2*nb_exos)] )
                level = mean(serie, na.rm=TRUE)
                centered_serie = serie - level
-               data$append(time, centered_serie, level, exo, exo_hat)
+               data[[i]] = list("time"=time, "centered_serie"=centered_serie, "level"=level,
+                       "exo"=exo, "exo_hat"=exo_hat)
                if (i >= limit)
                        break
                i = i + 1
        }
-       if (length(data$getCenteredSerie(1)) < length(data$getCenteredSerie(1)))
-               data$removeFirst()
-       if (length(data$getCenteredSerie( data$getSize() )) <
-               length(data$getCenteredSerie( data$getSize()-1 )))
+       start = 1
+       end = length(data)
+       if (length(data[[1]]$centered_serie) < length(data[[2]]$centered_serie))
+               start = 2
+       if (length(data[[length(data)]]$centered_serie) <
+               length(data[[length(data)-1]]$centered_serie))
        {
-               data$removeLast()
+               end = end-1
        }
-
+       if (start>1 || end<length(data))
+               data = data[start:end]
        data
 }
index 5cb55b0..9a7a1ee 100644 (file)
@@ -63,21 +63,19 @@ computeFilaments <- function(data, index, limit=60, plot=TRUE)
        L = length(ref_serie)
 
        # Determine indices of no-NAs days followed by no-NAs tomorrows
-       first_day = ifelse(length(data$getCenteredSerie(1))<L, 2, 1)
-       fdays_indices = c()
-       for (i in first_day:(index-1))
+       fdays = c()
+       for (i in 1:(index-1))
        {
                if ( !any(is.na(data$getSerie(i)) | is.na(data$getSerie(i+1))) )
-                       fdays_indices = c(fdays_indices, i)
+                       fdays = c(fdays, i)
        }
 
-       distances = sapply(fdays_indices, function(i) {
+       distances = sapply(fdays, function(i) {
                sqrt( sum( (ref_serie - data$getCenteredSerie(i))^2 ) / L )
        })
        indices = sort(distances, index.return=TRUE)$ix[1:min(limit,length(distances))]
        yrange = quantile( c(ref_serie, sapply( indices, function(i) {
-               ii = fdays_indices[i]
-               serie = c(data$getCenteredSerie(ii), data$getCenteredSerie(ii+1))
+               serie = c(data$getCenteredSerie(fdays[i]), data$getCenteredSerie(fdays[i]+1))
                if (!all(is.na(serie)))
                        return (range(serie, na.rm=TRUE))
                c()
@@ -93,7 +91,7 @@ computeFilaments <- function(data, index, limit=60, plot=TRUE)
                par(mar=c(4.7,5,1,1), cex.axis=1.5, cex.lab=1.5, lwd=2)
                for ( i in seq_len(length(indices)+1) )
                {
-                       ii = ifelse(i<=length(indices), fdays_indices[ indices[plot_order[i]] ], index)
+                       ii = ifelse(i<=length(indices), fdays[ indices[plot_order[i]] ], index)
                        plot(c(data$getCenteredSerie(ii),data$getCenteredSerie(ii+1)),
                                ylim=yrange, type="l", col=colors[i],
                                xlab=ifelse(i==1,"Temps (en heures)",""), ylab=ifelse(i==1,"PM10 centrĂ©",""))
@@ -102,7 +100,7 @@ computeFilaments <- function(data, index, limit=60, plot=TRUE)
                }
                abline(v=24, lty=2, col=colors()[56])
        }
-       list("indices"=c(fdays_indices[ indices[plot_order] ],index), "colors"=colors)
+       list("indices"=c(fdays[ indices[plot_order] ],index), "colors"=colors)
 }
 
 #' @title Plot similarities
@@ -187,17 +185,8 @@ plotFbox <- function(data, filter=function(index) TRUE, plot_bivariate=TRUE)
        if (!requireNamespace("rainbow", quietly=TRUE))
                stop("Functional boxplot requires the rainbow package")
 
-       start_index = 1
-       end_index = data$getSize()
-       if (length(data$getCenteredSerie(1)) < length(data$getCenteredSerie(2)))
-       {
-               # Shifted start (7am, or 1pm, or...)
-               start_index = 2
-               end_index = data$getSize() - 1
-       }
-
        L = length(data$getCenteredSerie(2))
-       series_matrix = sapply(start_index:end_index, function(index) {
+       series_matrix = sapply(1:data$getSize(), function(index) {
                if (filter(index))
                        as.matrix(data$getSerie(index))
                else
@@ -256,19 +245,18 @@ plotRelativeVariability = function(data, indices, ...)
        ref_var = apply(ref_series, 2, sd)
 
        # Determine indices of no-NAs days followed by no-NAs tomorrows
-       first_day = ifelse(length(data$getCenteredSerie(1))<length(ref_series[1,]), 2, 1)
-       fdays_indices = c()
-       for (i in first_day:(tail(indices,1)-1))
+       fdays = c()
+       for (i in 1:(tail(indices,1)-1))
        {
                if ( !any(is.na(data$getSerie(i)) | is.na(data$getSerie(i+1))) )
-                       fdays_indices = c(fdays_indices, i)
+                       fdays = c(fdays, i)
        }
 
        # TODO: 3 == magic number
        random_var = matrix(nrow=3, ncol=48)
        for (mc in seq_len(nrow(random_var)))
        {
-               random_indices = sample(fdays_indices, length(indices))
+               random_indices = sample(fdays, length(indices))
                random_series = t( sapply(random_indices, function(i) {
                        c( data$getSerie(i), data$getSerie(i+1) )
                }) )
index de8e861..0918a32 100644 (file)
@@ -1,6 +1,6 @@
-#' @title dateIndexToInteger
+#' dateIndexToInteger
 #'
-#' @description Transform a (potential) date index into an integer (relative to data)
+#' Transform a (potential) date index into an integer (relative to data)
 #'
 #' @param index Date (or integer) index
 #' @param data Object of class \code{Data}
@@ -20,26 +20,15 @@ dateIndexToInteger = function(index, data)
                integerIndex <- round( as.numeric(
                        difftime(indexAsDate, as.Date(data$getTime(1)[1])) ) ) + 1
                if (integerIndex >= 1 && integerIndex <= data$getSize())
-               {
-                       #WARNING: if index>=2 && series start at date >0h, result must be shifted
-                       #Convention: if first date asked, return first matching index (i.e.: 1)
-                       shift = 0
-                       if (integerIndex >= 2)
-                       {
-                               date1 = as.POSIXlt(data$getTime(1)[1])
-                               date2 = as.POSIXlt(data$getTime(2)[1])
-                               shift = (date1$year==date2$year && date1$mon==date2$mon && date1$mday==date2$mday)
-                       }
-                       return (integerIndex + shift)
-               }
+                       return (integerIndex)
                stop("Date outside data range")
        }
        stop("Unrecognized index format")
 }
 
-#' @title integerIndexToDate
+#' integerIndexToDate
 #'
-#' @description Transform an integer index to date index (relative to data)
+#' Transform an integer index to date index (relative to data)
 #'
 #' @param index Date (or integer) index
 #' @param data Object of class \code{Data}
@@ -55,9 +44,9 @@ integerIndexToDate = function(index, data)
        as.Date( data$getTime(index)[1] )
 }
 
-#' @title getSimilarDaysIndices
+#' getSimilarDaysIndices
 #'
-#' @description Find similar days indices in the past
+#' Find similar days indices in the past
 #'
 #' @param index Day index (numeric or date)
 #' @param limit Maximum number of indices to return
@@ -101,3 +90,14 @@ getSimilarDaysIndices = function(index, limit, same_season)
 
        return ( days[1:min(limit,length(days))] )
 }
+
+#' getSerie
+#'
+#' Return a time-serie from its centered version + level
+#'
+#' @param data A list as returned by \code{getData}
+#' @param index The index to return
+#'
+#' @export
+getSerie = function(data, index)
+       data[[index]]$centered_serie + data[[index]]$level
index 795307c..cedb40e 100644 (file)
@@ -2,22 +2,43 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading required package: R6\n"
+     ]
+    }
+   ],
    "source": [
     "library(talweg)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "ERROR",
+     "evalue": "Error in private$.data <<- list(): object 'private' not found\n",
+     "output_type": "error",
+     "traceback": [
+      "Error in private$.data <<- list(): object 'private' not found\nTraceback:\n",
+      "1. getData(ts_data, exo_data, input_tz = \"Europe/Paris\", working_tz = \"Europe/Paris\", \n .     predict_at = 7)",
+      "2. Data$new()",
+      "3. .subset2(public_bind_env, \"initialize\")(...)",
+      "4. initializeData(self, private)"
+     ]
+    }
+   ],
    "source": [
     "ts_data = read.csv(system.file(\"extdata\",\"pm10_mesures_H_loc.csv\",package=\"talweg\"))\n",
     "exo_data = read.csv(system.file(\"extdata\",\"meteo_extra_noNAs.csv\",package=\"talweg\"))\n",