X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=pkg%2FR%2FF_Neighbors2.R;h=7267661eaeda8201810cef5fc3c6a2ab903ca57c;hb=9db234c56c330bb3f652718c5ee1eb16bc1f6fc7;hp=e6adddea7a03cd5d0f21cf6ee84dfc26b40a1ab8;hpb=5c49f6cecd547358b327e9363e62bcc8219e9e33;p=talweg.git

diff --git a/pkg/R/F_Neighbors2.R b/pkg/R/F_Neighbors2.R
index e6addde..7267661 100644
--- a/pkg/R/F_Neighbors2.R
+++ b/pkg/R/F_Neighbors2.R
@@ -29,21 +29,18 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 					fdays, today, horizon, list(...)$h_window, kernel, TRUE) )
 			}
 
-
 			# Indices of similar days for cross-validation; TODO: 45 = magic number
-			# TODO: ici faut une sorte de "same_season==TRUE" --> mois similaires epandage
 			sdays = getSimilarDaysIndices(today, limit=45, same_season=FALSE)
 
-
 			# Function to optimize h : h |--> sum of prediction errors on last 45 "similar" days
 			errorOnLastNdays = function(h, kernel)
 			{
 				error = 0
 				nb_jours = 0
-				for (i in intersect(fdays,sdays))
+				for (day in intersect(fdays,sdays))
 				{
 					# mix_strategy is never used here (simtype != "mix"), therefore left blank
-					prediction = private$.predictShapeAux(data, fdays, i, horizon, h, kernel, FALSE)
+					prediction = private$.predictShapeAux(data,fdays,day,horizon,h,kernel,FALSE)
 					if (!is.na(prediction[1]))
 					{
 						nb_jours = nb_jours + 1
@@ -55,9 +52,8 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 			}
 
 			# h :: only for endo in this variation
-			h_best_endo = optimize(errorOnLastNdays, c(0,10), kernel=kernel)$minimum
-
-			return (private$.predictShapeAux(data, fdays, today, horizon, h_best, kernel, TRUE))
+			h_best = optimize(errorOnLastNdays, c(0,7), kernel=kernel)$minimum
+			return (private$.predictShapeAux(data,fdays,today,horizon,h_best,kernel,TRUE))
 		}
 	),
 	private = list(
@@ -69,15 +65,30 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 			if (length(fdays) < 3)
 				return (NA)
 
-			# ENDO:: Distances from last observed day to days in the past
-			distances2 = rep(NA, length(fdays))
-			for (i in seq_along(fdays))
+			# Neighbors: days in "same season"
+			sdays = getSimilarDaysIndices(today, limit=45, same_season=TRUE, data)
+			indices = intersect(fdays,sdays)
+			levelToday = data$getLevel(today)
+			distances = sapply(seq_along(indices), function(i) abs(data$getLevel(i)-levelToday))
+			same_pollution = (distances <= 2)
+			if (sum(same_pollution) < 3) #TODO: 3 == magic number
 			{
-				delta = data$getSerie(today) - data$getSerie(fdays[i])
-				# Require at least half of non-NA common values to compute the distance
-				if ( !any( is.na(delta) ) )
-					distances2[i] = mean(delta^2)
+				same_pollution = (distances <= 5)
+				if (sum(same_pollution) < 3)
+					return (NA)
 			}
+			indices = indices[same_pollution]
+
+			# Now OK: indices same season, same pollution level
+			# ...........
+
+
+			# ENDO:: Distances from last observed day to days in the past
+			serieToday = data$getSerie(today)
+			distances2 = sapply(indices, function(i) {
+				delta = serieToday - data$getSerie(i)
+				distances2[i] = mean(delta^2)
+			})
 
 			sd_dist = sd(distances2)
 			if (sd_dist < .Machine$double.eps)
@@ -96,51 +107,36 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
 					u
 				}
 
-			# EXOGENS: distances computations are enough
-			# TODO: search among similar concentrations....... at this stage ?!
-			M = matrix( nrow=1+length(fdays), ncol=1+length(data$getExo(today)) )
-			M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) )
-			for (i in seq_along(fdays))
-				M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) )
-
-			sigma = cov(M) #NOTE: robust covariance is way too slow
-			sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed?
-
-			# Distances from last observed day to days in the past
-			distances2 = rep(NA, nrow(M)-1)
-			for (i in 2:nrow(M))
-			{
-				delta = M[1,] - M[i,]
-				distances2[i-1] = delta %*% sigma_inv %*% delta
-			}
-
-			ppv <- sort(distances2, index.return=TRUE)$ix[1:10] #..............
-#PPV pour endo ?
-
-			similarities =
-				if (simtype == "exo")
-					simils_exo
-				else if (simtype == "endo")
-					simils_endo
-				else #mix
-					simils_endo * simils_exo
+#			# EXOGENS: distances computations are enough
+#			# TODO: search among similar concentrations....... at this stage ?!
+#			M = matrix( nrow=1+length(fdays), ncol=1+length(data$getExo(today)) )
+#			M[1,] = c( data$getLevel(today), as.double(data$getExo(today)) )
+#			for (i in seq_along(fdays))
+#				M[i+1,] = c( data$getLevel(fdays[i]), as.double(data$getExo(fdays[i])) )
+#
+#			sigma = cov(M) #NOTE: robust covariance is way too slow
+#			sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed?
+#
+#			# Distances from last observed day to days in the past
+#			distances2 = rep(NA, nrow(M)-1)
+#			for (i in 2:nrow(M))
+#			{
+#				delta = M[1,] - M[i,]
+#				distances2[i-1] = delta %*% sigma_inv %*% delta
+#			}
+
+			similarities = simils_endo
 
 			prediction = rep(0, horizon)
-			for (i in seq_along(fdays))
-				prediction = prediction + similarities[i] * data$getSerie(fdays[i]+1)[1:horizon]
+			for (i in seq_along(indices))
+				prediction = prediction + similarities[i] * data$getSerie(indices[i]+1)[1:horizon]
 			prediction = prediction / sum(similarities, na.rm=TRUE)
 
 			if (final_call)
 			{
 				private$.params$weights <- similarities
-				private$.params$indices <- fdays
-				private$.params$window <-
-					if (simtype=="endo")
-						h_endo
-					else if (simtype=="exo")
-						h_exo
-					else #mix
-						c(h_endo,h_exo)
+				private$.params$indices <- indices
+				private$.params$window <- h
 			}
 
 			return (prediction)