X-Git-Url: https://git.auder.net/rpsls.js?a=blobdiff_plain;f=pkg%2FR%2Fb_Algorithm.R;fp=pkg%2FR%2Fb_Algorithm.R;h=3ff9cc9b1e425c5dec6cc3c092d972c675c0e867;hb=357b8be00388d07c04c10d4bf7f503fd947185d2;hp=0000000000000000000000000000000000000000;hpb=a961f8a15492bf4b18d24bc117358d1f412dd078;p=aggexp.git

diff --git a/pkg/R/b_Algorithm.R b/pkg/R/b_Algorithm.R
new file mode 100644
index 0000000..3ff9cc9
--- /dev/null
+++ b/pkg/R/b_Algorithm.R
@@ -0,0 +1,111 @@
+#' @include z_util.R
+
+#' @title Algorithm
+#'
+#' @description Generic class to represent an algorithm
+#'
+#' @field H The window [t-H+1, t] considered for prediction at time step t+1
+#' @field data Data frame of the last H experts forecasts + observations.
+#'
+Algorithm = setRefClass(
+	Class = "Algorithm",
+
+	fields = list(
+		H = "numeric",
+		data = "data.frame"
+	),
+
+	methods = list(
+		initialize = function(...)
+		{
+			"Initialize (generic) Algorithm object"
+
+			callSuper(...)
+			if (length(H) == 0 || H < 1)
+				H <<- Inf
+		},
+		inputNextForecasts = function(x)
+		{
+			"Obtain a new series of vectors of experts forecasts (1 to K)"
+
+			nd = nrow(data)
+			nx = nrow(x)
+			indices = (nd+1):(nd+nx)
+
+			appendedData = as.data.frame(matrix(nrow=nx, ncol=ncol(data), NA))
+			names(appendedData) = names(data)
+			data <<- rbind(data, appendedData)
+			data[indices,names(x)] <<- x
+		},
+		inputNextObservations = function(y)
+		{
+			"Obtain the observations corresponding to last input forecasts"
+
+			#if all experts made a large unilateral error and prediction is very bad, remove data
+			n = nrow(data)
+			lastTime = data[n,"Date"]
+			xy = subset(data, subset=(Date == lastTime))
+			xy[,"Measure"] = y
+			x = xy[,names(xy) != "Measure"]
+			y = xy[,"Measure"]
+			ranges = apply(x-y, 1, range)
+			predictableIndices = (ranges[2,] > -MAX_ERROR & ranges[1,] < MAX_ERROR)
+#			predictableIndices = 1:length(y)
+			data <<- data[1:(n-nrow(xy)),]
+			data <<- rbind(data, xy[predictableIndices,])
+
+			#oldest rows are removed to prevent infinitely growing memory usage,
+			#or to allow a window effect (parameter H)
+			delta = nrow(data) - min(H, MAX_HISTORY)
+			if (delta > 0)
+				data <<- data[-(1:delta),]
+		},
+		predict_withNA = function()
+		{
+			"Predict observations corresponding to the last input forecasts. Potential NAs"
+
+			n = nrow(data)
+			if (data[n,"Date"] == 1)
+			{
+				#no measures added so far
+				return (rep(NA, n))
+			}
+
+			nx = n - nrow(subset(data, subset = (Date == data[n,"Date"])))
+			x = data[(nx+1):n, !names(data) %in% c("Date","Measure","Station")]
+			experts = names(x)
+			prediction = c()
+
+			#extract a maximal submatrix of data without NAs
+
+			iy = getNoNAindices(x, 2)
+			if (!any(iy))
+			{
+				#all columns of x have at least one NA
+				return (rep(NA, n-nx))
+			}
+
+			data_noNA = data[1:nx,c(experts[iy], "Measure")]
+			ix = getNoNAindices(data_noNA)
+			if (!any(ix))
+			{
+				#no full line with NA-pattern similar to x[,iy]
+				return (rep(NA, n-nx))
+			}
+
+			data_noNA = data_noNA[ix,]
+			xiy = as.data.frame(x[,iy])
+			names(xiy) = names(x)[iy]
+			res = predict_noNA(data_noNA, xiy)
+			#basic sanitization: force all values >=0
+			res[res < 0.] = 0.
+			return (res)
+		},
+		predict_noNA = function(XY, x)
+		{
+			"Predict observations corresponding to x. No NAs"
+
+			#empty default implementation: to implement in inherited classes
+		}
+	)
+)