[aggexp.git] / pkg / R / z_runAlgorithm.R

#' @include b_Algorithm.R

algoNameDictionary = list(
	ew = "ExponentialWeights",
	kn = "KnearestNeighbors",
	ga = "GeneralizedAdditive",
	ml = "MLpoly",
	rt = "RegressionTree",
	rr = "RidgeRegression",
	sv = "SVMclassif"
)

#' @title Simulate real-time predict
#'
#' @description Run the algorithm coded by \code{shortAlgoName} on data specified by the \code{stations} argument.
#'
#' @param shortAlgoName Short name of the algorithm.
#' \itemize{
#'   \item ew : Exponential Weights
#'   \item ga : Generalized Additive Model
#'   \item kn : K Nearest Neighbors
#'   \item ml : MLpoly
#'   \item rt : Regression Tree
#'   \item rr : Ridge Regression
#' }
#' @param stations List of stations dataframes to consider.
#' @param experts Vector of experts to consider (names).
#' @param ... Additional arguments to be passed to the Algorithm object.
#'
#' @return A list with the following slots
#' \itemize{
#'   \item{data : data frame of all forecasts + measures (may contain NAs) + predictions, with date and station indices.}
#'   \item{algo : object of class \code{Algorithm} (or sub-class).}
#'   \item{stations : list of dataframes of stations for this run.}
#'   \item{experts : character vector of experts for this run.}
#' }
#'
#' @examples
#' data(stations)
#' r = runAlgorithm("ew", list(st[[1]]), c("P","MA3"))
#' plotCurves(r)
#' r2 = runAlgorithm("ml", st[c(1,2)], c("MA3","MA10"))
#' plotError(r2)
#' @export
runAlgorithm = function(shortAlgoName, stations, experts, ...)
{
	#very basic input checks
	if (! shortAlgoName %in% names(algoNameDictionary))
		stop("Unknown algorithm:")
	experts = unique(experts)

	#get data == ordered date indices + forecasts + measures + stations indices (would be DB in prod)
	oracleData = getData(stations, experts)

	#simulate incremental forecasts acquisition + prediction + get measure
	algoData = as.data.frame(matrix(nrow=0, ncol=ncol(oracleData)))
	names(algoData) = names(oracleData)
	algorithm = new(algoNameDictionary[[shortAlgoName]], data=algoData, ...)
	predictions = c()
	T = oracleData[nrow(oracleData),"Date"]
	for (t in 1:T)
	{
		#NOTE: bet that subset extract rows in the order they appear
		tData = subset(oracleData, subset = (Date==t))
		algorithm$inputNextForecasts(tData[,names(tData) != "Measure"])
		predictions = c(predictions, algorithm$predict_withNA())
		algorithm$inputNextObservations(tData[,"Measure"])
	}

	oracleData = cbind(oracleData, Prediction = predictions)
	return (list(data = oracleData, algo = algorithm, experts = experts, stations = stations))
}
Commit	Line	Data
b76a24cd BA	1	#' @include b_Algorithm.R
	2
	3	algoNameDictionary = list(
	4	ew = "ExponentialWeights",
	5	kn = "KnearestNeighbors",
	6	ga = "GeneralizedAdditive",
	7	ml = "MLpoly",
	8	rt = "RegressionTree",
	9	rr = "RidgeRegression",
	10	sv = "SVMclassif"
	11	)
	12
	13	#' @title Simulate real-time predict
	14	#'
	15	#' @description Run the algorithm coded by \code{shortAlgoName} on data specified by the \code{stations} argument.
	16	#'
	17	#' @param shortAlgoName Short name of the algorithm.
	18	#' \itemize{
	19	#' \item ew : Exponential Weights
	20	#' \item ga : Generalized Additive Model
	21	#' \item kn : K Nearest Neighbors
	22	#' \item ml : MLpoly
	23	#' \item rt : Regression Tree
	24	#' \item rr : Ridge Regression
	25	#' }
	26	#' @param stations List of stations dataframes to consider.
	27	#' @param experts Vector of experts to consider (names).
	28	#' @param ... Additional arguments to be passed to the Algorithm object.
	29	#'
	30	#' @return A list with the following slots
	31	#' \itemize{
	32	#' \item{data : data frame of all forecasts + measures (may contain NAs) + predictions, with date and station indices.}
	33	#' \item{algo : object of class \code{Algorithm} (or sub-class).}
	34	#' \item{stations : list of dataframes of stations for this run.}
	35	#' \item{experts : character vector of experts for this run.}
	36	#' }
	37	#'
	38	#' @examples
	39	#' data(stations)
	40	#' r = runAlgorithm("ew", list(st[[1]]), c("P","MA3"))
	41	#' plotCurves(r)
	42	#' r2 = runAlgorithm("ml", st[c(1,2)], c("MA3","MA10"))
	43	#' plotError(r2)
	44	#' @export
	45	runAlgorithm = function(shortAlgoName, stations, experts, ...)
	46	{
	47	#very basic input checks
	48	if (! shortAlgoName %in% names(algoNameDictionary))
	49	stop("Unknown algorithm:")
	50	experts = unique(experts)
	51
	52	#get data == ordered date indices + forecasts + measures + stations indices (would be DB in prod)
	53	oracleData = getData(stations, experts)
	54
	55	#simulate incremental forecasts acquisition + prediction + get measure
	56	algoData = as.data.frame(matrix(nrow=0, ncol=ncol(oracleData)))
	57	names(algoData) = names(oracleData)
	58	algorithm = new(algoNameDictionary[[shortAlgoName]], data=algoData, ...)
	59	predictions = c()
	60	T = oracleData[nrow(oracleData),"Date"]
	61	for (t in 1:T)
	62	{
	63	#NOTE: bet that subset extract rows in the order they appear
	64	tData = subset(oracleData, subset = (Date==t))
65	algorithm$inputNextForecasts(tData[,names(tData) != "Measure"])
66	predictions = c(predictions, algorithm$predict_withNA())
67	algorithm$inputNextObservations(tData[,"Measure"])
68	}
69
70	oracleData = cbind(oracleData, Prediction = predictions)
71	return (list(data = oracleData, algo = algorithm, experts = experts, stations = stations))
72	}