+
+# getConstrainedNeighbs
+#
+# Get indices of neighbors of similar pollution level (among same season + day type).
+#
+# @param today Index of current day
+# @param data Object of class Data
+# @param tdays Current set of "second days" (no-NA pairs)
+# @param min_neighbs Minimum number of points in a neighborhood
+# @param max_neighbs Maximum number of points in a neighborhood
+#
+.getConstrainedNeighbs = function(today, data, tdays, min_neighbs=10)
+{
+ levelToday = data$getLevelHat(today)
+ distances = sapply( tdays, function(i) abs(data$getLevel(i) - levelToday) )
+ #TODO: 1, +1, +3 : magic numbers
+ dist_thresh = 1
+ min_neighbs = min(min_neighbs,length(tdays))
+ repeat
+ {
+ same_pollution = (distances <= dist_thresh)
+ nb_neighbs = sum(same_pollution)
+ if (nb_neighbs >= min_neighbs) #will eventually happen
+ break
+ dist_thresh = dist_thresh + ifelse(dist_thresh>1,3,1)
+ }
+ tdays[same_pollution]
+}
+
+# compute similarities
+#
+# Apply the gaussian kernel on computed squared distances.
+#
+# @param distances2 Squared distances
+# @param window Window parameter for the kernel
+#
+.computeSimils <- function(distances2, window)
+{
+ sd_dist = sd(distances2)
+ if (sd_dist < .25 * sqrt(.Machine$double.eps))
+ {
+# warning("All computed distances are very close: stdev too small")
+ sd_dist = 1 #mostly for tests... FIXME:
+ }
+ exp(-distances2/(sd_dist*window^2))
+}
+
+.computeDistsEndo <- function(data, today, tdays, predict_from)
+{
+ lastSerie = c( data$getSerie(today-1),
+ data$getSerie(today)[if (predict_from>=2) 1:(predict_from-1) else c()] )
+ sapply(tdays, function(i) {
+ delta = lastSerie - c(data$getSerie(i-1),
+ data$getSerie(i)[if (predict_from>=2) 1:(predict_from-1) else c()])
+ sqrt(mean(delta^2))
+ })
+}
+
+.computeDistsExo <- function(data, today, tdays)
+{
+ M = matrix( ncol=1+length(tdays), nrow=1+length(data$getExo(1)) )
+ M[,1] = c( data$getLevelHat(today), as.double(data$getExoHat(today)) )
+ for (i in seq_along(tdays))
+ M[,i+1] = c( data$getLevel(tdays[i]), as.double(data$getExo(tdays[i])) )
+
+ sigma = cov(t(M)) #NOTE: robust covariance is way too slow
+ # TODO: 10 == magic number; more robust way == det, or always ginv()
+ sigma_inv =
+ if (length(tdays) > 10)
+ solve(sigma)
+ else
+ MASS::ginv(sigma)
+
+ # Distances from last observed day to days in the past
+ sapply(seq_along(tdays), function(i) {
+ delta = M[,1] - M[,i+1]
+ delta %*% sigma_inv %*% delta
+ })
+}