| 1 | #' @include b_Algorithm.R |
| 2 | |
| 3 | algoNameDictionary = list( |
| 4 | ew = "ExponentialWeights", |
| 5 | kn = "KnearestNeighbors", |
| 6 | ga = "GeneralizedAdditive", |
| 7 | ml = "MLpoly", |
| 8 | rt = "RegressionTree", |
| 9 | rr = "RidgeRegression", |
| 10 | sv = "SVMclassif" |
| 11 | ) |
| 12 | |
| 13 | #' @title Simulate real-time predict |
| 14 | #' |
| 15 | #' @description Run the algorithm coded by \code{shortAlgoName} on data specified by the \code{stations} argument. |
| 16 | #' |
| 17 | #' @param shortAlgoName Short name of the algorithm. |
| 18 | #' \itemize{ |
| 19 | #' \item ew : Exponential Weights |
| 20 | #' \item ga : Generalized Additive Model |
| 21 | #' \item kn : K Nearest Neighbors |
| 22 | #' \item ml : MLpoly |
| 23 | #' \item rt : Regression Tree |
| 24 | #' \item rr : Ridge Regression |
| 25 | #' } |
| 26 | #' @param stations List of stations dataframes to consider. |
| 27 | #' @param experts Vector of experts to consider (names). |
| 28 | #' @param ... Additional arguments to be passed to the Algorithm object. |
| 29 | #' |
| 30 | #' @return A list with the following slots |
| 31 | #' \itemize{ |
| 32 | #' \item{data : data frame of all forecasts + measures (may contain NAs) + predictions, with date and station indices.} |
| 33 | #' \item{algo : object of class \code{Algorithm} (or sub-class).} |
| 34 | #' \item{stations : list of dataframes of stations for this run.} |
| 35 | #' \item{experts : character vector of experts for this run.} |
| 36 | #' } |
| 37 | #' |
| 38 | #' @examples |
| 39 | #' data(stations) |
| 40 | #' r = runAlgorithm("ew", list(st[[1]]), c("P","MA3")) |
| 41 | #' plotCurves(r) |
| 42 | #' r2 = runAlgorithm("ml", st[c(1,2)], c("MA3","MA10")) |
| 43 | #' plotError(r2) |
| 44 | #' @export |
| 45 | runAlgorithm = function(shortAlgoName, stations, experts, ...) |
| 46 | { |
| 47 | #very basic input checks |
| 48 | if (! shortAlgoName %in% names(algoNameDictionary)) |
| 49 | stop("Unknown algorithm:") |
| 50 | experts = unique(experts) |
| 51 | |
| 52 | #get data == ordered date indices + forecasts + measures + stations indices (would be DB in prod) |
| 53 | oracleData = getData(stations, experts) |
| 54 | |
| 55 | #simulate incremental forecasts acquisition + prediction + get measure |
| 56 | algoData = as.data.frame(matrix(nrow=0, ncol=ncol(oracleData))) |
| 57 | names(algoData) = names(oracleData) |
| 58 | algorithm = new(algoNameDictionary[[shortAlgoName]], data=algoData, ...) |
| 59 | predictions = c() |
| 60 | T = oracleData[nrow(oracleData),"Date"] |
| 61 | for (t in 1:T) |
| 62 | { |
| 63 | #NOTE: bet that subset extract rows in the order they appear |
| 64 | tData = subset(oracleData, subset = (Date==t)) |
| 65 | algorithm$inputNextForecasts(tData[,names(tData) != "Measure"]) |
| 66 | predictions = c(predictions, algorithm$predict_withNA()) |
| 67 | algorithm$inputNextObservations(tData[,"Measure"]) |
| 68 | } |
| 69 | |
| 70 | oracleData = cbind(oracleData, Prediction = predictions) |
| 71 | return (list(data = oracleData, algo = algorithm, experts = experts, stations = stations)) |
| 72 | } |