From b76a24cd3444299e154dda153fa9392f13adf0ed Mon Sep 17 00:00:00 2001 From: Benjamin Auder Date: Mon, 22 Jan 2018 20:43:56 +0100 Subject: [PATCH] First commit --- .gitignore | 10 +++ README.md | 18 +++++ TODO | 2 + pkg/DESCRIPTION | 39 +++++++++ pkg/LICENSE | 22 +++++ pkg/NAMESPACE | 13 +++ pkg/R/A_NAMESPACE.R | 3 + pkg/R/b_Algorithm.R | 111 +++++++++++++++++++++++++ pkg/R/b_LinearAlgorithm.R | 65 +++++++++++++++ pkg/R/d_dataset.R | 28 +++++++ pkg/R/m_ExponentialWeights.R | 51 ++++++++++++ pkg/R/m_GeneralizedAdditive.R | 42 ++++++++++ pkg/R/m_KnearestNeighbors.R | 48 +++++++++++ pkg/R/m_MLPoly.R | 51 ++++++++++++ pkg/R/m_RegressionTree.R | 36 +++++++++ pkg/R/m_RidgeRegression.R | 49 +++++++++++ pkg/R/m_SVMclassif.R | 47 +++++++++++ pkg/R/z_getData.R | 28 +++++++ pkg/R/z_plot.R | 148 ++++++++++++++++++++++++++++++++++ pkg/R/z_plotHelper.R | 100 +++++++++++++++++++++++ pkg/R/z_runAlgorithm.R | 72 +++++++++++++++++ pkg/R/z_util.R | 49 +++++++++++ pkg/data/stations.RData | Bin 0 -> 6874 bytes pkg/man/aggexp-package.Rd | 38 +++++++++ pkg/src/ew.predict_noNA.c | 69 ++++++++++++++++ pkg/src/ml.predict_noNA.c | 64 +++++++++++++++ 26 files changed, 1203 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 TODO create mode 100644 pkg/DESCRIPTION create mode 100644 pkg/LICENSE create mode 100644 pkg/NAMESPACE create mode 100644 pkg/R/A_NAMESPACE.R create mode 100644 pkg/R/b_Algorithm.R create mode 100644 pkg/R/b_LinearAlgorithm.R create mode 100644 pkg/R/d_dataset.R create mode 100644 pkg/R/m_ExponentialWeights.R create mode 100644 pkg/R/m_GeneralizedAdditive.R create mode 100644 pkg/R/m_KnearestNeighbors.R create mode 100644 pkg/R/m_MLPoly.R create mode 100644 pkg/R/m_RegressionTree.R create mode 100644 pkg/R/m_RidgeRegression.R create mode 100644 pkg/R/m_SVMclassif.R create mode 100644 pkg/R/z_getData.R create mode 100644 pkg/R/z_plot.R create mode 100644 pkg/R/z_plotHelper.R create mode 100644 pkg/R/z_runAlgorithm.R create mode 100644 pkg/R/z_util.R create mode 100644 pkg/data/stations.RData create mode 100644 pkg/man/aggexp-package.Rd create mode 100644 pkg/src/ew.predict_noNA.c create mode 100644 pkg/src/ml.predict_noNA.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8cfbb70 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +.RData +!/pkg/data/*.RData +.Rhistory +.ipynb_checkpoints/ +*.so +*.o +*.swp +*~ +/pkg/man/* +!/pkg/man/aggexp-package.Rd diff --git a/README.md b/README.md new file mode 100644 index 0000000..b15de94 --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +# Experts aggregation for air quality forecasting + +Joint work with [Jean-Michel Poggi](http://www.math.u-psud.fr/~poggi/) and [Bruno Portier](http://lmi2.insa-rouen.fr/~bportier/) + +--- + +This project gathers public material of a contract with [AirNormand](http://www.airnormand.fr/), located in Normandie (France). +This institute is in charge of monitoring and forecasting the air quality in its region. +Private parts (intermediate reports, custom code) were stripped. + +Several forecasting models are available, but it is difficult to choose one and discard the others, because +the performances vary significantly over time. +Therefore, the main goal of our study is to experiment several rules of experts (sequential) aggregation, and +compare the performances against individual forecasters and some oracles. + +--- + +The final report may be found at [this location](http://www.airnormand.fr/Publications/Publications-telechargeables/Rapports-d-etudes) diff --git a/TODO b/TODO new file mode 100644 index 0000000..196c62a --- /dev/null +++ b/TODO @@ -0,0 +1,2 @@ +Clarify what ridge method is really doing. +Améliorer / augmenter doc diff --git a/pkg/DESCRIPTION b/pkg/DESCRIPTION new file mode 100644 index 0000000..f38407a --- /dev/null +++ b/pkg/DESCRIPTION @@ -0,0 +1,39 @@ +Package: aggexp +Title: aggexp : AGGregation of EXPerts to forecast time-series +Version: 0.2-3 +Description: As the title suggests, past predictions of a set of given experts + are aggregated until time t to predict at time t+1, (generally) as a weighted + sum of values at time t. Several weights optimization algorithm are compared: + exponential weights, MLPoly, and some classical statistical learning procedures + (Ridge, SVM...). +Author: Benjamin Auder [aut,cre], + Jean-Michel Poggi [ctb], + Bruno Portier , [ctb] +Maintainer: Benjamin Auder +Depends: + R (>= 3.0) +Suggests: + gam, + tree, + kernlab +LazyData: yes +URL: http://git.auder.net/?p=aggexp.git +License: MIT + file LICENSE +Collate: + 'A_NAMESPACE.R' + 'z_util.R' + 'b_Algorithm.R' + 'b_LinearAlgorithm.R' + 'd_dataset.R' + 'm_ExponentialWeights.R' + 'm_GeneralizedAdditive.R' + 'm_KnearestNeighbors.R' + 'm_MLPoly.R' + 'm_RegressionTree.R' + 'm_RidgeRegression.R' + 'm_SVMclassif.R' + 'z_getData.R' + 'z_runAlgorithm.R' + 'z_plotHelper.R' + 'z_plot.R' +RoxygenNote: 5.0.1 diff --git a/pkg/LICENSE b/pkg/LICENSE new file mode 100644 index 0000000..f02a780 --- /dev/null +++ b/pkg/LICENSE @@ -0,0 +1,22 @@ +Copyright (c) 2014-2016, Benjamin AUDER + 2014-2016, Jean-Michel Poggi + 2014-2016, Bruno Portier + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/pkg/NAMESPACE b/pkg/NAMESPACE new file mode 100644 index 0000000..766b75b --- /dev/null +++ b/pkg/NAMESPACE @@ -0,0 +1,13 @@ +# Generated by roxygen2: do not edit by hand + +export(getBestConvexCombination) +export(getBestExpert) +export(getBestLinearCombination) +export(getData) +export(getIndicators) +export(plotCloud) +export(plotCurves) +export(plotError) +export(plotRegret) +export(runAlgorithm) +useDynLib(aggexp) diff --git a/pkg/R/A_NAMESPACE.R b/pkg/R/A_NAMESPACE.R new file mode 100644 index 0000000..4651887 --- /dev/null +++ b/pkg/R/A_NAMESPACE.R @@ -0,0 +1,3 @@ +#' @useDynLib aggexp +#' +NULL diff --git a/pkg/R/b_Algorithm.R b/pkg/R/b_Algorithm.R new file mode 100644 index 0000000..3ff9cc9 --- /dev/null +++ b/pkg/R/b_Algorithm.R @@ -0,0 +1,111 @@ +#' @include z_util.R + +#' @title Algorithm +#' +#' @description Generic class to represent an algorithm +#' +#' @field H The window [t-H+1, t] considered for prediction at time step t+1 +#' @field data Data frame of the last H experts forecasts + observations. +#' +Algorithm = setRefClass( + Class = "Algorithm", + + fields = list( + H = "numeric", + data = "data.frame" + ), + + methods = list( + initialize = function(...) + { + "Initialize (generic) Algorithm object" + + callSuper(...) + if (length(H) == 0 || H < 1) + H <<- Inf + }, + inputNextForecasts = function(x) + { + "Obtain a new series of vectors of experts forecasts (1 to K)" + + nd = nrow(data) + nx = nrow(x) + indices = (nd+1):(nd+nx) + + appendedData = as.data.frame(matrix(nrow=nx, ncol=ncol(data), NA)) + names(appendedData) = names(data) + data <<- rbind(data, appendedData) + data[indices,names(x)] <<- x + }, + inputNextObservations = function(y) + { + "Obtain the observations corresponding to last input forecasts" + + #if all experts made a large unilateral error and prediction is very bad, remove data + n = nrow(data) + lastTime = data[n,"Date"] + xy = subset(data, subset=(Date == lastTime)) + xy[,"Measure"] = y + x = xy[,names(xy) != "Measure"] + y = xy[,"Measure"] + ranges = apply(x-y, 1, range) + predictableIndices = (ranges[2,] > -MAX_ERROR & ranges[1,] < MAX_ERROR) +# predictableIndices = 1:length(y) + data <<- data[1:(n-nrow(xy)),] + data <<- rbind(data, xy[predictableIndices,]) + + #oldest rows are removed to prevent infinitely growing memory usage, + #or to allow a window effect (parameter H) + delta = nrow(data) - min(H, MAX_HISTORY) + if (delta > 0) + data <<- data[-(1:delta),] + }, + predict_withNA = function() + { + "Predict observations corresponding to the last input forecasts. Potential NAs" + + n = nrow(data) + if (data[n,"Date"] == 1) + { + #no measures added so far + return (rep(NA, n)) + } + + nx = n - nrow(subset(data, subset = (Date == data[n,"Date"]))) + x = data[(nx+1):n, !names(data) %in% c("Date","Measure","Station")] + experts = names(x) + prediction = c() + + #extract a maximal submatrix of data without NAs + + iy = getNoNAindices(x, 2) + if (!any(iy)) + { + #all columns of x have at least one NA + return (rep(NA, n-nx)) + } + + data_noNA = data[1:nx,c(experts[iy], "Measure")] + ix = getNoNAindices(data_noNA) + if (!any(ix)) + { + #no full line with NA-pattern similar to x[,iy] + return (rep(NA, n-nx)) + } + + data_noNA = data_noNA[ix,] + xiy = as.data.frame(x[,iy]) + names(xiy) = names(x)[iy] + res = predict_noNA(data_noNA, xiy) + #basic sanitization: force all values >=0 + res[res < 0.] = 0. + return (res) + }, + predict_noNA = function(XY, x) + { + "Predict observations corresponding to x. No NAs" + + #empty default implementation: to implement in inherited classes + } + ) +) diff --git a/pkg/R/b_LinearAlgorithm.R b/pkg/R/b_LinearAlgorithm.R new file mode 100644 index 0000000..960b067 --- /dev/null +++ b/pkg/R/b_LinearAlgorithm.R @@ -0,0 +1,65 @@ +#' @include b_Algorithm.R + +#' @title Linear Algorithm +#' +#' @description Generic class to represent a linear algorithm. +#' TODO: not needed in production environment; weights growing infinitely. +#' Inherits \code{\link{Algorithm}} +#' +#' @field weights The matrix of weights (in rows) associated to each expert (in columns) +#' +LinearAlgorithm = setRefClass( + Class = "LinearAlgorithm", + + fields = c( + weights = "matrix" + ), + + contains = "Algorithm", + + methods = list( + initialize = function(...) + { + callSuper(...) + weights <<- matrix(nrow=0, ncol=ncol(data)-3) + }, + + appendWeight = function(weight) + { + "Append the last computed weights to the weights matrix, for further plotting" + + n = nrow(data) + nx = n - nrow(subset(data, subset = (Date == data[n,"Date"]))) + x = data[(nx+1):n, !names(data) %in% c("Date","Measure","Station")] + iy = getNoNAindices(x, 2) + + completedWeight = rep(NA, ncol(x)) + completedWeight[iy] = weight + weights <<- rbind(weights, completedWeight) + }, + + plotWeights = function(station=1, start=1, ...) + { + "Plot the weights of each expert over time" + + if (is.character(station)) + station = match(station, stations) + + #keep only full weights (1 to K) + weights_ = weights[getNoNAindices(weights),] + weights_ = weights_[start:nrow(weights_),] + + yRange = range(weights_, na.rm=TRUE) + K = ncol(weights_) + cols = rainbow(K) + par(mar=c(5,4.5,1,1), cex=1.5) + for (i in 1:K) + { + plot(weights_[,i], type="l", xaxt="n", ylim=yRange, col=cols[i], xlab="", ylab="",cex.axis=1.5, ...) + par(new=TRUE) + } + axis(side=1, at=seq(from=1,to=nrow(weights_),by=30), labels=seq(from=0,to=nrow(weights_),by=30) + start, cex.axis=1.5) + title(xlab="Time",ylab="Weight", cex.lab=1.6) + } + ) +) diff --git a/pkg/R/d_dataset.R b/pkg/R/d_dataset.R new file mode 100644 index 0000000..6300284 --- /dev/null +++ b/pkg/R/d_dataset.R @@ -0,0 +1,28 @@ +#' Sample data built from DataMarket Rhine River time-series +#' +#' 3 "stations": original serie, reversed series, average of both.\cr +#' "Experts": persistence (P), moving average with window==3 (MA3) and 10 (MA10).\cr +#' -----\cr +#' Generating R code:\cr +#' library(rdatamarket)\cr +#' serie = dmseries("https://datamarket.com/data/set/22wp/rhine-river-near-basle-switzerland-1807-1957")\cr +#' dates = seq(as.Date("1807-07-01"),as.Date("1956-07-01"),"years")\cr +#' serie = list(serie, rev(serie), (serie+rev(serie))/2)\cr +#' st = list()\cr +#' for (i in 1:3) {\cr +#' st[[i]] = data.frame(\cr +#' Date=dates,\cr +#' P=c(NA,serie[[i]][1:149]),\cr +#' MA3=c(rep(NA,3),sapply(4:150, function(j) mean(serie[[i]][(j-3):(j-1)]) )),\cr +#' MA10=c(rep(NA,10),sapply(11:150, function(j) mean(serie[[i]][(j-10):(j-1)]) )),\cr +#' Measure=as.double(serie[[i]]) +#' )\cr +#' }\cr +#' save(st, file="stations.RData") +#' +#' @name stations +#' @docType data +#' @usage data(stations) +#' @references \url{https://datamarket.com/data/set/22wp/rhine-river-near-basle-switzerland-1807-1957} +#' @format A list of 3 dataframes with 150 rows and 5 columns: Date,P,MA3,MA10,Measure +NULL diff --git a/pkg/R/m_ExponentialWeights.R b/pkg/R/m_ExponentialWeights.R new file mode 100644 index 0000000..0916287 --- /dev/null +++ b/pkg/R/m_ExponentialWeights.R @@ -0,0 +1,51 @@ +#' @include b_LinearAlgorithm.R + +#' @title Exponential Weights Algorithm +#' +#' @description Exponential Weights Algorithm. +#' Inherits \code{\link{LinearAlgorithm}} +#' +#' @field alpha Importance of weights redistribution, in [0,1]. Default: 0 +#' @field grad Whether to use or not the (sub)gradient trick. Default: FALSE +#' +ExponentialWeights = setRefClass( + Class = "ExponentialWeights", + + fields = c( + alpha = "numeric", + grad = "logical" + ), + + contains = "LinearAlgorithm", + + methods = list( + initialize = function(...) + { + callSuper(...) + if (length(alpha) == 0 || alpha < 0. || alpha > 1.) + alpha <<- 0. #no redistribution + if (length(grad) == 0) + grad <<- FALSE + }, + predict_noNA = function(XY, x) + { + K = ncol(XY) - 1 + if (K == 1) + { + #shortcut: nothing to combine + finalWeight = 1. + } + + else + { + X = XY[,names(XY) != "Measure"] + Y = XY[,"Measure"] + finalWeight = .C("ew_predict_noNA", X = as.double(t(X)), Y = as.double(Y), n = as.integer(nrow(XY)), + K = as.integer(K), alpha=as.double(alpha), grad = as.integer(grad), weight=double(K))$weight + } + + appendWeight(finalWeight) + return (matricize(x) %*% finalWeight) + } + ) +) diff --git a/pkg/R/m_GeneralizedAdditive.R b/pkg/R/m_GeneralizedAdditive.R new file mode 100644 index 0000000..5baf60b --- /dev/null +++ b/pkg/R/m_GeneralizedAdditive.R @@ -0,0 +1,42 @@ +#' @include b_Algorithm.R + +#' @title Generalized Additive Model +#' +#' @description Generalized Additive Model using the \code{gam} package. +#' Inherits \code{\link{Algorithm}} +#' +#' @field family Family of the distribution to be used. Default: gaussian(). +#' +GeneralizedAdditive = setRefClass( + Class = "GeneralizedAdditive", + + fields = c( + "family" #class "family" + ), + + contains = "Algorithm", + + methods = list( + initialize = function(...) + { + callSuper(...) + if (class(family) == "uninitializedField") + family <<- gaussian() + }, + predict_noNA = function(XY, x) + { + #GAM need some data to provide reliable results + if (nrow(XY) < 30) + { + X = XY[,names(XY) != "Measure"] + Y = XY[,"Measure"] + weight = ridgeSolve(X, Y, LAMBDA) + return (matricize(x) %*% weight) + } + + suppressPackageStartupMessages( require(gam) ) + g = gam(Measure ~ ., data=XY, family=family) + return (stats::predict(g, x)) + } + ) +) diff --git a/pkg/R/m_KnearestNeighbors.R b/pkg/R/m_KnearestNeighbors.R new file mode 100644 index 0000000..926b22b --- /dev/null +++ b/pkg/R/m_KnearestNeighbors.R @@ -0,0 +1,48 @@ +#' @include b_Algorithm.R + +#' @title K Nearest Neighbors Algorithm +#' +#' @description K Nearest Neighbors Algorithm. +#' Inherits \code{\link{Algorithm}} +#' +#' @field k Number of neighbors to consider. Default: \code{n^(2/3)} +#' +KnearestNeighbors = setRefClass( + Class = "KnearestNeighbors", + + fields = c( + k = "numeric" + ), + + contains = "Algorithm", + + methods = list( + predictOne = function(X, Y, x) + { + "Find the neighbors of one row, and solve a constrained linear system to obtain weights" + + distances = sqrt(apply(X, 1, function(z)(return (sum((z-x)^2))))) + rankedHistory = sort(distances, index.return=TRUE) + n = length(Y) + k_ = ifelse(length(k) == 0 || k <= 0. || k > n, getKnn(n), as.integer(k)) + weight = ridgeSolve(matricize(X[rankedHistory$ix[1:k_],]), Y[rankedHistory$ix[1:k_]], LAMBDA) + + return (sum(x * weight)) + }, + predict_noNA = function(XY, x) + { + X = XY[,names(XY) != "Measure"] + K = ncol(XY) - 1 + if (K == 1) + X = as.matrix(X) + else if (length(XY[["Measure"]]) == 1) + X = t(as.matrix(X)) + Y = XY[,"Measure"] + x = matricize(x) + res = c() + for (i in 1:nrow(x)) + res = c(res, predictOne(X, Y, x[i,])) + return (res) + } + ) +) diff --git a/pkg/R/m_MLPoly.R b/pkg/R/m_MLPoly.R new file mode 100644 index 0000000..a19a2c9 --- /dev/null +++ b/pkg/R/m_MLPoly.R @@ -0,0 +1,51 @@ +#' @include b_LinearAlgorithm.R + +#' @title MLpoly Algorithm +#' +#' @description MLpoly Algorithm. +#' Inherits \code{\link{LinearAlgorithm}} +#' +#' @field alpha Importance of weights redistribution, in [0,1]. Default: 0 +#' @field grad Whether to use or not the (sub)gradient trick. Default: FALSE +#' +MLpoly = setRefClass( + Class = "MLpoly", + + fields = c( + alpha = "numeric", + grad = "logical" + ), + + contains = "LinearAlgorithm", + + methods = list( + initialize = function(...) + { + callSuper(...) + if (length(alpha) == 0 || alpha < 0. || alpha > 1.) + alpha <<- 0. #no redistribution + if (length(grad) == 0) + grad <<- FALSE + }, + predict_noNA = function(XY, x) + { + K = ncol(XY) - 1 + if (K == 1) + { + #shortcut: nothing to combine + finalWeight = 1. + } + + else + { + X = XY[,names(XY) != "Measure"] + Y = XY[,"Measure"] + finalWeight = .C("ml_predict_noNA", X = as.double(t(X)), Y = as.double(Y), n = as.integer(nrow(XY)), + K = as.integer(K), alpha=as.double(alpha), grad = as.integer(grad), weight=double(K))$weight + } + + appendWeight(finalWeight) + return (matricize(x) %*% finalWeight) + } + ) +) diff --git a/pkg/R/m_RegressionTree.R b/pkg/R/m_RegressionTree.R new file mode 100644 index 0000000..d51e408 --- /dev/null +++ b/pkg/R/m_RegressionTree.R @@ -0,0 +1,36 @@ +#' @include b_Algorithm.R + +#' @title Regression Tree +#' +#' @description Regression Tree using the \code{tree} package. +#' Inherits \code{\link{Algorithm}} +#' +#' @field nleaf Number of leaf nodes after pruning. Default: Inf (no pruning) +#' +RegressionTree = setRefClass( + Class = "RegressionTree", + + fields = c( + nleaf = "numeric" + ), + + contains = "Algorithm", + + methods = list( + initialize = function(...) + { + callSuper(...) + if (length(nleaf) == 0 || nleaf < 1) + nleaf <<- Inf + }, + predict_noNA = function(XY, x) + { + require(tree, quietly=TRUE) + rt = tree(Measure ~ ., data=XY) + treeSize = sum( rt$frame[["var"]] == "" ) + if (treeSize > nleaf) + rt = prune.tree(rt, best = nleaf) + return (stats::predict(rt, as.data.frame(x))) + } + ) +) diff --git a/pkg/R/m_RidgeRegression.R b/pkg/R/m_RidgeRegression.R new file mode 100644 index 0000000..020894d --- /dev/null +++ b/pkg/R/m_RidgeRegression.R @@ -0,0 +1,49 @@ +#' @include b_LinearAlgorithm.R + +#' @title Ridge Regression Algorithm +#' +#' @description Ridge Regression Algorithm. +#' Inherits \code{\link{LinearAlgorithm}} +#' +#' @field lambda Value of lambda (let undefined for cross-validation). Default: undefined +#' @field lambdas Vector of "optimal" lambda values over time. TODO: remove for production +#' +RidgeRegression = setRefClass( + Class = "RidgeRegression", + + fields = c( + lambda = "numeric", + lambdas = "numeric" + ), + + contains = "LinearAlgorithm", + + methods = list( + predict_noNA = function(XY, x) + { + if (length(lambda) > 0 || nrow(XY) < 30) #TODO: magic number + { + #simple ridge regression with fixed lambda (not enough history for CV) + X = matricize(XY[,names(XY) != "Measure"]) + Y = XY[,"Measure"] + lambda_ = ifelse(length(lambda) > 0, lambda, LAMBDA) + weight = ridgeSolve(X, Y, lambda_) + } + + else + { + #enough data for cross-validations + require(MASS, quietly=TRUE) + gridLambda = seq(0.05,5.05,0.1) + res_lmr = lm.ridge(Measure ~ . + 0, data=XY, lambda = gridLambda) + lambda_ = res_lmr$lambda[which.min(res_lmr$GCV)] + weight = as.matrix(coef(res_lmr))[which.min(res_lmr$GCV),] + } + + lambdas <<- c(lambdas, lambda_) + + appendWeight(weight) + return (matricize(x) %*% weight) + } + ) +) diff --git a/pkg/R/m_SVMclassif.R b/pkg/R/m_SVMclassif.R new file mode 100644 index 0000000..30e9a2b --- /dev/null +++ b/pkg/R/m_SVMclassif.R @@ -0,0 +1,47 @@ +#' @include b_Algorithm.R + +#' @title SVM Algorithm +#' +#' @description SVM classifier. +#' Inherits \code{\link{Algorithm}} +#' +#' @field kernel TODO +#' @field someParam TODO +#' +SVMclassif = setRefClass( + Class = "SVMclassif", + + fields = c( + kernel = "numeric", + someParam = "logical" + ), + + contains = "Algorithm", + + methods = list( + initialize = function(...) + { + callSuper(...) + #TODO + }, + predict_noNA = function(XY, x) + { + if (nrow(XY) <= 5) + return (10) #TODO + + require(kernlab, quietly=TRUE) + XY[,"alert"] = XY[,"Measure"] > 30 + alertsIndices = XY[,"alert"] + XY[alertsIndices,"alert"] = "alert" + XY[!alertsIndices,"alert"] = "noalert" + XY[,"alert"] = as.factor(XY[,"alert"]) + XY[,"Measure"] = NULL + + ks = ksvm(alert ~ ., data=XY) + pred = as.character(predict(ks, as.data.frame(x))) + pred[pred == "alert"] = 70 + pred[pred == "noalert"] = 10 + return (as.numeric(pred)) + } + ) +) diff --git a/pkg/R/z_getData.R b/pkg/R/z_getData.R new file mode 100644 index 0000000..43c458b --- /dev/null +++ b/pkg/R/z_getData.R @@ -0,0 +1,28 @@ +#' @title Get forecasts + observations +#' +#' @description Get forecasts of all specified experts for all specified stations, also with (ordered) dates and (unordered) stations indices. +#' +#' @param station List of stations dataframes (as in the sample) +#' @param experts Names of the experts (as in dataframe header) +#' +#' @export +getData = function(stations, experts) +{ + data = as.data.frame(matrix(nrow=0, ncol=1 + length(experts) + 2)) + names(data) = c("Date", experts, "Measure", "Station") + for (i in 1:length(stations)) + { + #date index is sufficient; also add station index + stationInfo = cbind( + Date = 1:nrow(stations[[i]]), + stations[[i]] [,names(stations[[i]]) %in% experts], + Measure = stations[[i]][,"Measure"], + Station = i) + data = rbind(data, stationInfo) + } + + #extra step: order by date (would be a DB request) + data = data[order(data[,"Date"]),] + + return (data) +} diff --git a/pkg/R/z_plot.R b/pkg/R/z_plot.R new file mode 100644 index 0000000..9e94913 --- /dev/null +++ b/pkg/R/z_plot.R @@ -0,0 +1,148 @@ +#' @include z_plotHelper.R + +#' @title Plot forecasts/observations +#' +#' @description Plot the measures at one station versus all experts forecasts. +#' +#' @param r Output of \code{\link{runAlgorithm}}. +#' @param station Name or index of the station to consider. Default: the first one +#' @param interval Time interval for the plot. Default: all time range. +#' @param experts Subset of experts for the plot. Default: all experts. +#' @param ... Additional arguments to be passed to graphics::plot method. +#' +#' @export +plotCurves = function(r, station=1, interval=1:(nrow(r$data)/length(r$stations)), experts=r$experts, cols=rainbow(length(experts)), ...) +{ + if (is.character(station)) + station = match(station, r$stations) + if (is.numeric(experts)) + experts = r$experts[experts] + + XY = subset(r$data[interval,], subset = (Station == station), select = c(experts,"Measure")) + indices = getNoNAindices(XY) + XY = XY[indices,] + X = as.matrix(XY[,names(XY) %in% experts]) + Y = XY[,"Measure"] + + yRange = range(XY) + par(mar=c(5,4.5,1,1), cex=1.5) + for (i in 1:length(experts)) + { + plot(X[,i],ylim=yRange,type="l",lty="dotted",col=cols[i],xlab="",ylab="",xaxt="n",yaxt="n", lwd=2, ...) + par(new=TRUE) + } + plot(Y, type="l", ylim=yRange, xlab="", ylab="", lwd=2, cex.axis=1.5, ...) + title(xlab="Time",ylab="Forecasts / Measures", cex.lab=1.6) + legend("topright", lwd=c(2,1),lty=c("solid","dotted"),horiz=TRUE,legend=c("Measures","Forecasts")) +} + +#' @title Plot error +#' +#' @description Plot the absolute error over time at one station. +#' +#' @param r Output of \code{\link{runAlgorithm}}. +#' @param station Name or index of the station to consider. Default: the first one +#' @param start First index to consider (too much variability in early errors) +#' @param noNA TRUE to show only errors associated with full lines (old behavior) +#' @param ... Additional arguments to be passed to graphics::plot method. +#' +#' @export +plotError = function(r, station=1, start=1, noNA=TRUE, ...) +{ + if (is.character(station)) + station = match(station, r$stations) + + XY = subset(r$data, subset = (Station == station), select = c(r$experts,"Measure","Prediction")) + Y = XY[,"Measure"] + hatY = XY[,"Prediction"] + indices = !is.na(Y) & !is.na(hatY) + if (noNA) + { + X = XY[,names(XY) %in% r$experts] + indices = indices & getNoNAindices(X) + } + Y = Y[indices] + hatY = hatY[indices] + + error = abs(Y - hatY) + par(mar=c(5,4.5,1,1), cex=1.5) + plot(error, type="l", xaxt="n", xlab="Time",ylab="L1 error", cex.lab=1.6, cex.axis=1.5, ...) + axis(side=1, at=(seq(from=start,to=length(Y),by=30) - start), labels=seq(from=start,to=length(Y),by=30), cex.axis=1.5) +} + +#' @title Plot regret +#' +#' @description Plot the regret over time at one station. +#' +#' @param r Output of \code{\link{runAlgorithm}}. +#' @param vs Linear weights to compare with. Can be obtained by the \code{getBestXXX} methods, or by any other mean. +#' @param station Name or index of the station to consider. Default: the first one +#' @param start First index to consider (too much variability in early errors) +#' @param ... Additional arguments to be passed to graphics::plot method. +#' +#' @export +plotRegret = function(r, vs, station=1, start=1, ...) +{ + if (is.character(station)) + station = match(station, r$stations) + + XY = subset(r$data, subset = (Station == station), select = c(r$experts,"Measure","Prediction")) + X = XY[,names(XY) %in% r$experts] + Y = XY[,"Measure"] + hatY = XY[,"Prediction"] + + indices = !is.na(Y) & !is.na(hatY) & getNoNAindices(X) + X = as.matrix(X[indices,]) + Y = Y[indices] + hatY = hatY[indices] + + error2 = abs(Y - hatY)^2 + vsError2 = abs(Y - X %*% vs)^2 + cumErr2 = cumsum(error2) / seq_along(error2) + cumVsErr2 = cumsum(vsError2) / seq_along(vsError2) + regret = cumErr2 - cumVsErr2 + + par(mar=c(5,4.5,1,1), cex=1.5) + plot(regret, type="l", xaxt="n", xlab="Time", ylab="Regret", cex.lab=1.6, cex.axis=1.5, ...) + abline(a=0., b=0., col=2) + axis(side=1, at=(seq(from=start,to=length(Y),by=30) - start), labels=seq(from=start,to=length(Y),by=30), cex.axis=1.5) +} + +#' @title Plot predicted/expected cloud +#' +#' @description Plot the cloud of forecasts/observations + statistical indicators. +#' +#' @param r Output of \code{\link{runAlgorithm}}. +#' @param thresh Threshold to consider for alerts (usually 30 or 50) +#' @param hintThresh thresholds to draw on the plot to help visualization. Often \code{c(30,50,80)} +#' @param station Name or index of the station to consider. Default: the first one +#' @param noNA TRUE to show only errors associated with full lines (old behavior) +#' @param ... Additional arguments to be passed to graphics::plot method. +#' +#' @export +plotCloud = function(r, thresh=30, hintThresh=c(30,50,80), station=1, noNA=TRUE, ...) +{ + if (is.character(station)) + station = match(station, r$stations) + + XY = subset(r$data, subset = (Station == station), select = c(r$experts,"Measure","Prediction")) + Y = XY[,"Measure"] + hatY = XY[,"Prediction"] + indices = !is.na(Y) & !is.na(hatY) + if (noNA) + { + X = XY[,names(XY) %in% r$experts] + indices = indices & getNoNAindices(X) + } + Y = Y[indices] + hatY = hatY[indices] + + indics = getIndicators(r, thresh, station, noNA) + + par(mar=c(5,5,3,2), cex=1.5) + plot(Y, hatY, xlab="Measured PM10", ylab="Predicted PM10", + cex.lab=1.6, cex.axis=1.5, xlim=c(0,120), ylim=c(0,120), ...) + abline(0,1,h=hintThresh,v=hintThresh,col=2,lwd=2) + legend("topleft",legend=paste("RMSE ",indics$RMSE)) + legend("bottomright",legend=c(paste("TS ",indics$TS))) +} diff --git a/pkg/R/z_plotHelper.R b/pkg/R/z_plotHelper.R new file mode 100644 index 0000000..f522f0f --- /dev/null +++ b/pkg/R/z_plotHelper.R @@ -0,0 +1,100 @@ +#' @include z_runAlgorithm.R + +#' @title Get best expert index +#' +#' @description Return the weights corresponding to the best expert (...0,1,0...) +#' +#' @param r Output of \code{\link{runAlgorithm}} +#' +#' @export +getBestExpert = function(r) +{ + X = as.matrix(r$data[,names(r$data) %in% r$experts]) + Y = r$data[,"Measure"] + + bestIndex = which.min(colMeans(abs(X - Y)^2, na.rm=TRUE)) + res = rep(0.0, length(r$experts)) + res[bestIndex] = 1.0 + return (res) +} + +#' @title Get best convex combination +#' +#' @description Return the weights p minimizing the quadratic error ||X*p-Y||^2 under convexity contraint. +#' +#' @param r Output of \code{\link{runAlgorithm}} +#' +#' @export +getBestConvexCombination = function(r) +{ + X = r$data[,r$experts] + Y = as.double(r$data[,"Measure"]) + indices = getNoNAindices(X) & !is.na(Y) + X = as.matrix(X[indices,]) + Y = Y[indices] + + K = length(r$experts) + return (constrOptim(theta=rep(1.0/K,K), + method="Nelder-Mead", #TODO: others not better... why? + f=function(p){return(sum((X%*%p-Y)^2))}, + grad=NULL, #function(p){return(2.*t(X)%*%(X%*%p-Y))}, + ui=rbind(rep(1.,K),rep(-1.,K),diag(K)), ci=c(0.99999,-1.00001, rep(0.,K)), + control=list(ndeps=1e-3,maxit=10000))$par) +} + +#' @title Get best linear combination +#' +#' @description Return the weights u minimizing the quadratic error ||r$X*u-r$Y||^2 +#' +#' @param r Output of \code{\link{runAlgorithm}} +#' +#' @export +getBestLinearCombination = function(r) +{ + X = r$data[,r$experts] + Y = r$data[,"Measure"] + indices = getNoNAindices(X) & !is.na(Y) + X = as.matrix(X[indices,]) + Y = Y[indices] + + return (mpPsInv(X) %*% Y) +} + +#' @title Get statistical indicators +#' +#' @description Return respectively the TS, FA, MA, RMSE, EV indicators in a list. +#' +#' @param r Output of \code{\link{runAlgorithm}} +#' @param thresh Threshold to compute alerts indicators. +#' @param station Name or index of the station to consider. Default: the first one +#' @param noNA TRUE to show only errors associated with full lines (old behavior) +#' +#' @export +getIndicators = function(r, thresh, station=1, noNA=TRUE) +{ + if (is.character(station)) + station = match(station, r$stations) + + #TODO: duplicated block (same in plotCloud()) + XY = subset(r$data, subset = (Station == station), select = c(r$experts,"Measure","Prediction")) + Y = XY[,"Measure"] + hatY = XY[,"Prediction"] + indices = !is.na(Y) & !is.na(hatY) + if (noNA) + { + X = XY[,names(XY) %in% r$experts] + indices = indices & getNoNAindices(X) + } + Y = Y[indices] + hatY = hatY[indices] + + RMSE = round(sqrt(sum((Y - hatY)^2) / length(Y)),2) + EV = round(1 - var(Y-hatY) / var(Y), 2) + A = sum(hatY >= thresh & Y >= thresh, na.rm=TRUE) #right alarm + B = sum(hatY >= thresh & Y < thresh, na.rm=TRUE) #false alarm + C = sum(hatY < thresh & Y >= thresh, na.rm=TRUE) #missed alert + TS = round(A/(A+B+C),2) + FA = B/(A+B) + MA = C/(A+C) + return (list("TS"=TS, "FA"=FA, "MA"=MA, "RMSE"=RMSE, "EV"=EV)) +} diff --git a/pkg/R/z_runAlgorithm.R b/pkg/R/z_runAlgorithm.R new file mode 100644 index 0000000..ed75454 --- /dev/null +++ b/pkg/R/z_runAlgorithm.R @@ -0,0 +1,72 @@ +#' @include b_Algorithm.R + +algoNameDictionary = list( + ew = "ExponentialWeights", + kn = "KnearestNeighbors", + ga = "GeneralizedAdditive", + ml = "MLpoly", + rt = "RegressionTree", + rr = "RidgeRegression", + sv = "SVMclassif" +) + +#' @title Simulate real-time predict +#' +#' @description Run the algorithm coded by \code{shortAlgoName} on data specified by the \code{stations} argument. +#' +#' @param shortAlgoName Short name of the algorithm. +#' \itemize{ +#' \item ew : Exponential Weights +#' \item ga : Generalized Additive Model +#' \item kn : K Nearest Neighbors +#' \item ml : MLpoly +#' \item rt : Regression Tree +#' \item rr : Ridge Regression +#' } +#' @param stations List of stations dataframes to consider. +#' @param experts Vector of experts to consider (names). +#' @param ... Additional arguments to be passed to the Algorithm object. +#' +#' @return A list with the following slots +#' \itemize{ +#' \item{data : data frame of all forecasts + measures (may contain NAs) + predictions, with date and station indices.} +#' \item{algo : object of class \code{Algorithm} (or sub-class).} +#' \item{stations : list of dataframes of stations for this run.} +#' \item{experts : character vector of experts for this run.} +#' } +#' +#' @examples +#' data(stations) +#' r = runAlgorithm("ew", list(st[[1]]), c("P","MA3")) +#' plotCurves(r) +#' r2 = runAlgorithm("ml", st[c(1,2)], c("MA3","MA10")) +#' plotError(r2) +#' @export +runAlgorithm = function(shortAlgoName, stations, experts, ...) +{ + #very basic input checks + if (! shortAlgoName %in% names(algoNameDictionary)) + stop("Unknown algorithm:") + experts = unique(experts) + + #get data == ordered date indices + forecasts + measures + stations indices (would be DB in prod) + oracleData = getData(stations, experts) + + #simulate incremental forecasts acquisition + prediction + get measure + algoData = as.data.frame(matrix(nrow=0, ncol=ncol(oracleData))) + names(algoData) = names(oracleData) + algorithm = new(algoNameDictionary[[shortAlgoName]], data=algoData, ...) + predictions = c() + T = oracleData[nrow(oracleData),"Date"] + for (t in 1:T) + { + #NOTE: bet that subset extract rows in the order they appear + tData = subset(oracleData, subset = (Date==t)) + algorithm$inputNextForecasts(tData[,names(tData) != "Measure"]) + predictions = c(predictions, algorithm$predict_withNA()) + algorithm$inputNextObservations(tData[,"Measure"]) + } + + oracleData = cbind(oracleData, Prediction = predictions) + return (list(data = oracleData, algo = algorithm, experts = experts, stations = stations)) +} diff --git a/pkg/R/z_util.R b/pkg/R/z_util.R new file mode 100644 index 0000000..996a5f8 --- /dev/null +++ b/pkg/R/z_util.R @@ -0,0 +1,49 @@ +#Maximum size of stored data to predict next PM10 +MAX_HISTORY = 10000 + +#Default lambda value (when too few data) +LAMBDA = 2. + +#Maximum error to keep a line in (incremental) data +MAX_ERROR = 20. + +#Turn a "vector" into 1D matrix if needed (because R auto cast 1D matrices) +matricize = function(x) +{ + if (!is.null(dim(x))) + return (as.matrix(x)) + return (t(as.matrix(x))) +} + +#Moore-Penrose pseudo inverse +mpPsInv = function(M) +{ + epsilon = 1e-10 + s = svd(M) + sd = s$d ; sd[sd < epsilon] = Inf + sd = diag(1.0 / sd, min(nrow(M),ncol(M))) + return (s$v %*% sd %*% t(s$u)) +} + +#Heuristic for k in knn algorithms +getKnn = function(n) +{ + return ( max(1, min(50, ceiling(n^(2./3.)))) ) +} + +#Minimize lambda*||u||^2 + ||Xu - Y||^2 +ridgeSolve = function(X, Y, lambda) +{ + s = svd(X) + deltaDiag = s$d / (s$d^2 + lambda) + deltaDiag[!is.finite(deltaDiag)] = 0.0 + if (length(deltaDiag) > 1) + deltaDiag = diag(deltaDiag) + return (s$v %*% deltaDiag %*% t(s$u) %*% Y) +} + +#Return the indices (of rows, by default) without any NA +getNoNAindices = function(M, margin=1) +{ + return (apply(M, margin, function(z)(!any(is.na(z))))) +} diff --git a/pkg/data/stations.RData b/pkg/data/stations.RData new file mode 100644 index 0000000000000000000000000000000000000000..00cc6d129540abdcedc2483c27b935ec0d4d7ef3 GIT binary patch literal 6874 zcmb2|=3oE==C{$65x&{C{h3b*oU}>hnRv$eQP~vB;Ab111g{24o-UXWRK5P|n!Vmn z6b>7v_PMnAr7Tl%(sY#yX|wR-yubO|=R2=!-dC=FU$y@A-+z|%Pj-FU@^0_-sPhH< z`+t8=p7i(2JhjWdU%q_#@|A~&_sbXF7wg-8eOaGX)mC+E)7|U+|L!*YyU@?`B}9u3x8-|2-IKr;~$&ZyULSp@_F0)laiIw>Z4D*$$vca@7e{&-O}^kpM2|fCT_~R zGsl$A#EBeu`*_D)B}a3yX@=E_LYu|i3U;@%O0ySM$EN9@Nc`3$VfMbsA#eVpP9F*_wMwh({E0O-z?o^xVz4vGxq-dlgFwn&(xit z6?;GS=(_iR&bbSx?YEs7d;iwab@}@qF6P~QKjQ4V$?G=0UUDmEk6r7{^=}iece1{* z+Q=?l|2_R%%j-8*3-xyID{AVU-oDAuuD4hG?oEmCbH_ewnH}4f_V}i8PS3{c$@TwK zH%p$4fA#3Kcm2xm`+M5H>D6(+{4w|6i%EBWegD*J`pq$e*LXe0R~Ci0ovV{yx!;)e z?$@@X8}bzFUYb8Uw(z^c+IRPI&QG1H@byzf_`HjJExBu*d%kXVu-dGzdj6+6$L79x zgUv?`Z-hDI|1S_}Df?^o<=Y;{=<>anqo*un`g-`=)-cH)hLbvfroVa{-w+kEIVCI!nrQPIR%*&$ZfvqzU^O@PZ~ume3Nu{(Bt8vfP+aHwv;XcS zMUz~K+CugVCEhnuo^6$F`4Y|A9(C#Yx}BRIUAxJ6>bJ`2v|Zm2~nq6Iv}ichXr?yAhXQ~ml{<8R?Tee23HHCU!yJ7;box!fi^+jy4W zo#;yYd+Viq{(rl>VvV^*X-seBy_E_tcIUNxmUfU<-k+z{u}{48v*_U?H|N^LH$PaO z>Z_&R!&3O~N6u|l?QWIFRt$T%=6ZMp)l_NU;&AO0RM zmVftu8|+{ucxQ3)!z=~smw8od*f*-z#%%q2YR}0S=eu_KTPIC_%YIPa#H#-2PtV^^ zuJ<&5IQO;WpSI44OoqpG%wIQ4_BO5g;#j+5=D$yThaG*YKJ-XUDL(#w;rz#S9^OwE zs&ef8_HTn+dIuO!+?-c3o43)WUc0;W27BV?9r2$l zF3!nY5Iy}%=e}om^8#+~l%8V~&oOD%&y6Ot>-MZ;GBceZ_H#}av((99 zcaX34^>;I!phxZ;HqWC)oFBhs=$8K)bNj7%vAI*kYR<}Y&fA$f7iT{8czXZywfpV? zJJ|xiZ)Ye?J^1}=<3)v(`slYc^=(4^PcCS`s$g??)a(CV_7CF{?$!Ivt!_yjYHPf@ z@=3ylU1C3~&GH-`e+?*qz94nMtxHdC=B(EVNS`%dCO+M`BX;}$poxDNIsz|0xORLW zqvX~)k4MSXmiaw%=GHNGv|pZm@@RI$6*E`w@Y(q{gu9}WUa$?bTDGI8*UKuq(EOz_ydIu|W|L50dN$oQ=JJ-*)a&Fv=tgw86bkq24F&WDD|DXNq z+qdU_k-~)<9jSYBq?l*f&7WSf`;LgmhfBhL_vkf+)b4n%|8k~+*@FJ-cNh}q{kBq` zJ#Ry4;myX~&(|nMn8Z&ue#Ek3_i^7E=^IifY_8v_Oy1mZ!+3vibl$Dp@R!`HHnKCl z|1HchcenN7hrdEO{@z(~ns2uDr{d$*x<46JB0F-c=N#L9bKAS^vHKgVdMs-{1_xNb zS@JoTdzD}qqh2SQ!khi>|L?G0T|LeCebwbX8B=}ozcUBsn7sV7V4q2Sztl{T#|%-2 z&zdivcaml0my0W+CYJo?fBNc`jHhN??e7oSJN0^I7Wc2)efzlG7nY2DDr!gf7CFj> zeelm}KNaAbE&syd0v9U~#j{2+@KmFD5P3`<^ z`ceCrx4YGC%wRe4ChTt9R|c1t7jIoX$M5ho-!gO4=Rk$vNtyrrSvGm?-#xd?+QW1E z!e>kE8@~K1UHx-Q(1fl>eHPocv-vuAzcyiReD;0ow^UQJrY{?pJn;L=GC^vt)}Er5 zA`^-yUG3Q9r66q|nYMK|=o;;>z7@$lDqKe@b2)5 z4x3KTek%Kh$HjVktk6u3OFcURuQ_o|h?1A8TK%gzA^vpI`-Q(1)&>T;e`b5R>v;9q zIbjN4x2AlwaN=ub+a#}3Hb0f^$*lf|Rd*^lTfQv`U42kc;Dn>^W4n_3H7jS= zaWGvfF7UZ|zSz;L^LtKC7-P`&4eOfArvF*K=j;7Ghpx*;n_RnApZT7@sid5xsPxFC z3yHTIYTgtr+MLhgv23G$-_%PL`zEL4{XRPB8Lv9OK`(pIwhuYKD>pan+4|9X%Nw%^ z8_l)#x7=o5^m+4)DGaloFUpdiU)Fi;Vaa6Omur7B8QwKFdh^Re;qs5IM?cN}zWvgl zX~Nc4zt3+|=)3>5`rkGNfiQiZ#hpRxy6tVMC7T8QZz{N>t;=y}sVTErkbSPe<@%Y$ z799KM-Fg$`_d4irm$S8gTGNAVKND^r^mTYMJxN^hpK!;Yp#4Y!csdhxvEYlbVZud+i06=zV?} z5Pwhny`FLVWvi4wwWpUkI+&@}+UrF>b47dgvGWxZIbPj0P}sG`eudhy=>7JNpTAtm&h)lY zYp}{aIbXXhp84Ul-n`nR`|oWR-+7Z^^X9F|feTVsrq|!wYirDW<^9~afFo>&c6`=V z@?my}6`8j6-FAD&7t?m1u+W|JXaeWlZ)x!i8v~y!&h>rdcxS>(*j@0aL|6HBV zbm`#KLldJ){_1Ne*zu=i{XeN_9h@Rl^uAyF@6%d~E1%B#DXe0hE2EuR`y->U#8IjC zpS-O1`PbIXAN2mjz7{{KFsE{x<*U+K0lRDWo|s+aW?9cIDrmXa;m54+r(Sv+tuXCW z)ZO3KSXJ;}*VwI|?d#X9*{7m)Cp=DHfBLn)f||V7%PrqkJB0TX&-;CyX<<@^&+;3~ z6_%u#ZI#_2(bDtl@?)LMe^oOt)o-)@JMT8%?Z)nN7b^J8Oud(s7JWIo{Fy>v&yt@Cu(g|J_5k*WP>Y z+}~}(YIwRh|8`eN)&GcDWous6)Mi93s;fMsyYX>btb@=!xym2yb{@aNR7DT(YhLn| zQ~!)5@1+B*@nUt{tS1-c`2D_HQY*i;oZWvmPte-|gFzbxCrRJxsg*FJ~4v%m6YP51RaGMCqZBl`1HrXqoN`(IyJ zv{IL&{+-l@wMoJqzqB8{U^2)L{n{S-H!-w6;;Q_=%fFo!7U}i<_iL-%S)u1*bmm3; z34?FdTRH!TGap~L_e1gVg|<0A-kZvZ@A{$lJ@Uu)=Tg5b)^9nMetn_+FQcvBA0OxE z|K|EvzFzr(kJyeMJ0o`gybvwH8@|+X!$z;-gvCdm#V)lpsGj=lz?=UWXCL0*bmzaW z@rN&KmSp?fpK;D`ulCD%%jN?>O`!>u;tUO%Q z{*?c;;xu=i=|5z2mPf>Gyp#0vXi?8o$?c zZjt7&7wA*4aoo}Lf%PHJA?X%tj(Pz;MVmvv1??Vw7l>25GmxnP}=jB7<#f#~!00pc^@kielOb+zGv^@GXC54IqXxv zO?c10t^bB!Qg!EBwQcjgtWP~=y&d|cI48Zb`uF!eU-P%jF46aL?=PzIU-(<(SO4Wk z4}PWCjxUOrIz9B2?sj}pzBG}`_T)>iCHkUPlP}Nax_k1a{}Shka<02XUIs4-7Ok3k zd9BBOCE3*%HqTiX`XbuzxzgV5lJ!pii>xGGCNAloXy;}n`7(9M?1^<@FEW-aw~X!m zVtVPON51mi-Y@2t9&*|Cl~^u6Khe*-0Yu)n_n@gWPt}EB}m)KqU?eSgvi~V%w z06B-93{RP-2nWPD>|}h(K1DfTp2JS3iTXR{G=5?`C96@dQN%IbJwVU#C(BcjDd7Qm zjz3wSN=!))Sm*eY?WxR^@_=`aKiQuuOlc3$bNb2gRAtKafIO$4oKH2TEEn0?bvj%l zec_(gBJor4T{6d>8csPKVCQVf`_$Am@9`(;Q-vDm7sj-Inyd7@u#*3&-IV_#mJ?1l zYlttZ=_pb<)vaN^=ubzH%Bjg3;aZ2+vOH|-ONz1n#CMuUEarvIi^j=~(@mffRpI^0>iN>WxbJSdKg)9u581~d;wbSk* zDY4ARC7Bbqxk`mEES>nw^_F;M!jjgBYHqh?Tt4BUt!-j_>D-()x2ULvD<^JqlakKN zU9!{iZLf*NrAHoxZ_HVJf^oQ&1`Ba7}tRgcR zQkka+282B@Vm`&E;jFQV<&=m<^n#v)sk~DxMQ$>u3g~q&@L6<1O=EFEr0c}U9dDY9 zcupBQmfAQg2FX{FYvZf0xJGG2z0Hd=7BcZ=f7WA>$agDu=Po9tYY*RVtTQn-(BN6p z^VsOolupa873YE;ELzyQhT%j*o5;?2trwmNHZT6Yj^ zymYztifNa&ztCsj^yf2Ag$-hiZKFGN!FfaVW|D{JWOVe|n?W+_QaK3jNk5uU0->KActl~9Sb_TOJmjJiIlDJ*^t`48< zUNrQ5F~0v_Y;#G2*H*<s$(?6gvyn6DB}MYk`qUp%(=bfsir)2nk) zvKvB|9y?<7W-s$B_xTbIb@!d!d=$*)s3}({ESy`yn^Ex7cV^~dhDA=B@1Nj2F;Doj zCd-fPCuL8o|Jc>=*@@q8et)#`!JdM77Wpyk>$tCXzCKoap!Q(xq1wZ>M{1AO9;-cG ze4zMX@uA|w#Yc*d79T5iw|Ced@TcW>?<4m+>o1(Y@Lt6JNnK}MS6z2qPhD?aU)}m6 zjQ!j_!adSG%01dW#y!?O&OP2e!9CHceU&lEbCT~Q%N^L$P|5U}?U~>i?HTC??-S%6 zRla;*`TBn6*8KH<=dj#bV0OpmX%}zqgvHNh?B6xTP5RpDM-6tmj;o6E^FQbFIjq{~ z7B=V0v7}g@zyCV9dnDdO9^&v=e0!_X{)mPI%a7BZY=~3yax<%nmu+cTurl39t|_Xt zGVZyOzrbDFFrKQpY?lOs=f-YQtj z-^<;n4={K1p1uChU8a#|_P1YOb{T%MQS#&6&9I~*rEH>V?uXcDf0-}VclAR9*026` z`)O%U+?`|MOS)g~dY}Eu_p)i$mWfN|eomQiC(FiLYOh4cJ^owq7ut*pf{V3F*;;Bs zehW`d3b;DCRoGgfPwvHaO;gDeYxJ%53vaer{N3j!=OLrnuU_q_5LhL->wErAzAJJI zmreU(ipwG6k=yXTBse1FRns3cs!r~m2 z$>N{>97+#}cKQEd^G(g)cl)f$e>MEd&PcW@6sYe{{GE5p_1zMOPxp3e=sgmgkbFGJ zUe|=}>rcKn_ruGrcHX$R*Pf&H`GqYPMLir&PYX26ohtbDhwUEz`OW8#et%G1D0gbl z51Xl#pGzO?Dww+??+*JN@jL2w%rsfhp2m|J!Q*wtX!R9 zsJ&29R(sb`GkexW|EF2AUH5OcNK5wo)1xEcd~BY7b`F2ah2GVU-7EK7-uQ84&Q+Ep z*240Ao0nP?cs=Qs+?vhUne%=2S_83;s`D+D%h(%bJl^F9`CpeT4cokn@o4zwiH3mhDoTV7|-Sk2n55oA2Nf9I42( z=}PK$->sYuY1Y*dre;hkMau=nUoc#Xt_j{=|MrDgeKiVgB<@>LDrLz)voMzPTzdf;4K`!!j>I?(sBj(Jd zPQ3yDO6tq2IazuW3m@*2R?xXLBcx<`o!Gak2Mgz#@Sp4be7?s2vDKM1=lI$`E1avy zG}$p*yTgvvv3(<&9(y?Sqxf9hqT=tKBdBzSmV= zXXW^K&EBQ=7cEf!v`_r)w<_*O=OvSc)qng^Rr!$pC+4T!{`g9{db@r8mG<-b&+$L+ z{`^1Nwx)JRT#WsF{yXw_^zYc;@xK#)C;v{p4Zn@NjsDX=A8bC>{H)Y|e*UlHNB%G3 zU*x~2f6@P9{>A=_`@8vF`-}d&*1P==`y=~z{_$oZc?o@+x+xWhejKVe{Nr%NksnV! zt~#RMpWc(-Q?B~2((b^Y1y8uo$m`5cxVP#R|LR|dSN+?t%Kp#t@9i^||8sO|y7&D* N +#include + +void ew_predict_noNA(double* X, double* Y, int* n_, int* K_, double* alpha_, int* grad_, double* weight) +{ + int K = *K_; + int n = *n_; + double alpha = *alpha_; + int grad = *grad_; + + //at least two experts to combine: various inits + double invMaxError = 1. / 50; //TODO: magic number + double logK = log(K); + double initWeight = 1. / K; + for (int i=0; i 30) || (X[t*K+i] > 30 && Y[t] <= 30)) + error[i] = 1.0; + else + error[i] = 0.0; +*/ + } + } + for (int i=0; i 0 (all weights are 0 or more, sum > 0) + for (int i=0; i +#include + +void ml_predict_noNA(double* X, double* Y, int* n_, int* K_, double* alpha_, int* grad_, double* weight) +{ + int K = *K_; + int n = *n_; + double alpha = *alpha_; + int grad = *grad_; + + //at least two experts to combine: various inits + double initWeight = 1. / K; + for (int i=0; i 0. ? eta * regret[i] : 0.; + } + + double sumWeight = 0.0; + for (int i=0; i 0 (all weights are 0 or more, sum > 0) + for (int i=0; i