ed75454c0862de8e04ced1b96dfd443441912e53
[aggexp.git] / pkg / R / z_runAlgorithm.R
1 #' @include b_Algorithm.R
2
3 algoNameDictionary = list(
4 ew = "ExponentialWeights",
5 kn = "KnearestNeighbors",
6 ga = "GeneralizedAdditive",
7 ml = "MLpoly",
8 rt = "RegressionTree",
9 rr = "RidgeRegression",
10 sv = "SVMclassif"
11 )
12
13 #' @title Simulate real-time predict
14 #'
15 #' @description Run the algorithm coded by \code{shortAlgoName} on data specified by the \code{stations} argument.
16 #'
17 #' @param shortAlgoName Short name of the algorithm.
18 #' \itemize{
19 #' \item ew : Exponential Weights
20 #' \item ga : Generalized Additive Model
21 #' \item kn : K Nearest Neighbors
22 #' \item ml : MLpoly
23 #' \item rt : Regression Tree
24 #' \item rr : Ridge Regression
25 #' }
26 #' @param stations List of stations dataframes to consider.
27 #' @param experts Vector of experts to consider (names).
28 #' @param ... Additional arguments to be passed to the Algorithm object.
29 #'
30 #' @return A list with the following slots
31 #' \itemize{
32 #' \item{data : data frame of all forecasts + measures (may contain NAs) + predictions, with date and station indices.}
33 #' \item{algo : object of class \code{Algorithm} (or sub-class).}
34 #' \item{stations : list of dataframes of stations for this run.}
35 #' \item{experts : character vector of experts for this run.}
36 #' }
37 #'
38 #' @examples
39 #' data(stations)
40 #' r = runAlgorithm("ew", list(st[[1]]), c("P","MA3"))
41 #' plotCurves(r)
42 #' r2 = runAlgorithm("ml", st[c(1,2)], c("MA3","MA10"))
43 #' plotError(r2)
44 #' @export
45 runAlgorithm = function(shortAlgoName, stations, experts, ...)
46 {
47 #very basic input checks
48 if (! shortAlgoName %in% names(algoNameDictionary))
49 stop("Unknown algorithm:")
50 experts = unique(experts)
51
52 #get data == ordered date indices + forecasts + measures + stations indices (would be DB in prod)
53 oracleData = getData(stations, experts)
54
55 #simulate incremental forecasts acquisition + prediction + get measure
56 algoData = as.data.frame(matrix(nrow=0, ncol=ncol(oracleData)))
57 names(algoData) = names(oracleData)
58 algorithm = new(algoNameDictionary[[shortAlgoName]], data=algoData, ...)
59 predictions = c()
60 T = oracleData[nrow(oracleData),"Date"]
61 for (t in 1:T)
62 {
63 #NOTE: bet that subset extract rows in the order they appear
64 tData = subset(oracleData, subset = (Date==t))
65 algorithm$inputNextForecasts(tData[,names(tData) != "Measure"])
66 predictions = c(predictions, algorithm$predict_withNA())
67 algorithm$inputNextObservations(tData[,"Measure"])
68 }
69
70 oracleData = cbind(oracleData, Prediction = predictions)
71 return (list(data = oracleData, algo = algorithm, experts = experts, stations = stations))
72 }