Initial commit
[aggexp.git] / R / z_runAlgorithm.R
1 #' @include b_Algorithm.R
2
3 algoNameDictionary = list(
4 ew = "ExponentialWeights",
5 kn = "KnearestNeighbors",
6 ga = "GeneralizedAdditive",
7 ml = "MLpoly",
8 rt = "RegressionTree",
9 rr = "RidgeRegression",
10 sv = "SVMclassif"
11 )
12
13 #' @title Simulate real-time predict
14 #'
15 #' @description Run the algorithm coded by \code{shortAlgoName} on data specified by the \code{stations} argument.
16 #'
17 #' @param shortAlgoName Short name of the algorithm.
18 #' \itemize{
19 #' \item ew : Exponential Weights
20 #' \item ga : Generalized Additive Model
21 #' \item kn : K Nearest Neighbors
22 #' \item ml : MLpoly
23 #' \item rt : Regression Tree
24 #' \item rr : Ridge Regression
25 #' }
26 #' @param experts Vector of experts to consider (names or indices). Default: all of them.
27 #' @param stations Vector of stations to consider (names or indices). Default: all of them.
28 #' @param ... Additional arguments to be passed to the Algorithm object.
29 #'
30 #' @return A list with the following slots
31 #' \itemize{
32 #' \item{data : data frame of all forecasts + measures (may contain NAs) + predictions, with date and station indices.}
33 #' \item{algo : object of class \code{Algorithm} (or sub-class).}
34 #' \item{experts : character vector of experts for this run.}
35 #' \item{stations : character vector of stations for this run.}
36 #' }
37 #'
38 #' @export
39 runAlgorithm = function(shortAlgoName, experts=expertsArray, stations=stationsArray, ...)
40 {
41 #check, sanitize and format provided arguments
42 if (! shortAlgoName %in% names(algoNameDictionary))
43 stop(paste("Typo in short algo name:", shortAlgoName))
44 if (!is.character(experts) && !is.numeric(experts))
45 stop("Wrong argument type: experts should be character or integer")
46 if (!is.character(stations) && !is.numeric(stations))
47 stop("Wrong argument type: stations should be character or integer")
48 experts = unique(experts)
49 stations = unique(stations)
50 Ka = length(expertsArray)
51 Sa = length(stationsArray)
52 if (length(experts) > Ka)
53 stop("Too many experts specified: at least one of them does not exist")
54 if (length(stations) > Sa)
55 stop("Too many stations specified: at least one of them does not exist")
56 if (is.numeric(experts) && any(experts > Ka))
57 stop(paste("Some experts indices are higher than the maximum which is", Ka))
58 if (is.numeric(stations) && any(stations > Sa))
59 stop(paste("Some stations indices are higher than the maximum which is", Sa))
60 if (is.character(experts))
61 {
62 expertsMismatch = (1:Ka)[! experts %in% expertsArray]
63 if (length(expertsMismatch) > 0)
64 stop(cat(paste("Typo in experts names:", experts[expertsMismatch]), sep="\n"))
65 }
66 if (is.character(stations))
67 {
68 stationsMismatch = (1:Sa)[! stations %in% stationsArray]
69 if (length(stationsMismatch) > 0)
70 stop(cat(paste("Typo in stations names:", stations[stationsMismatch]), sep="\n"))
71 }
72 if (!is.character(experts))
73 experts = expertsArray[experts]
74 if (!is.character(stations))
75 stations = stationsArray[stations]
76
77 #get data == ordered date indices + forecasts + measures + stations indices (would be DB in prod)
78 oracleData = getData(experts, stations)
79
80 #simulate incremental forecasts acquisition + prediction + get measure
81 algoData = as.data.frame(matrix(nrow=0, ncol=ncol(oracleData)))
82 names(algoData) = names(oracleData)
83 algorithm = new(algoNameDictionary[[shortAlgoName]], data=algoData, ...)
84 predictions = c()
85 T = oracleData[nrow(oracleData),"Date"]
86 for (t in 1:T)
87 {
88 #NOTE: bet that subset extract rows in the order they appear
89 tData = subset(oracleData, subset = (Date==t))
90 algorithm$inputNextForecasts(tData[,names(tData) != "Measure"])
91 predictions = c(predictions, algorithm$predict_withNA())
92 algorithm$inputNextObservations(tData[,"Measure"])
93 }
94
95 oracleData = cbind(oracleData, Prediction = predictions)
96 return (list(data = oracleData, algo = algorithm, experts = experts, stations = stations))
97 }