From: Benjamin Auder <benjamin.auder@somewhere>
Date: Thu, 27 Apr 2017 14:37:18 +0000 (+0200)
Subject: TODO: comprendre pkoi pas mêmes voisins
X-Git-Url: https://git.auder.net/doc/%7B%7B%20asset%28%27mixstore/current/pieces/%7B%7B%20pkg.url%20%7D%7D?a=commitdiff_plain;h=dca259e4e9c0235cb113b329c27157ac59d7242b;p=talweg.git

TODO: comprendre pkoi pas mêmes voisins
---

diff --git a/Etude_En_Cours_R_E_bis.R b/Etude_En_Cours_R_E_bis.R
index aa0d490..457cf79 100644
--- a/Etude_En_Cours_R_E_bis.R
+++ b/Etude_En_Cours_R_E_bis.R
@@ -1,5 +1,7 @@
+predi <- function(ij)
+{
 #setwd("/Users/bp/Desktop/CONTRATS_AirNormand/2016/RapportFinalBruno")
-rm(list=ls())
+#rm(list=ls())
 
 # Lecture des données: pm = dataframe 2 colonnes, date-time puis PM10 horaire
 pm  =  read.table("DATA/mesures_horaires_hloc_pm10_a_filer.csv",sep=",",dec=".",header=T)
@@ -48,7 +50,7 @@ ResDates = NULL
 
 nbvois=10
 j=1 # numéro de semaine
-ij=6 # numéro du jour (0 = lundi)
+#ij=6 # numéro du jour (0 = lundi)
 
 Err24 = NULL
 ErrPrev = NULL
@@ -79,6 +81,18 @@ Kvois = NULL
 	large = 1
 	bornes = mean(dataj[25:48]) + c(-large,large)
 	indcond = varexp[,"PMjour"]>=bornes[1] & varexp[,"PMjour"]<=bornes[2]
+	if (sum(indcond) < 10)
+	{
+		large = 2
+		bornes = mean(dataj[25:48]) + c(-large,large)
+		indcond = varexp[,"PMjour"]>=bornes[1] & varexp[,"PMjour"]<=bornes[2]
+	}
+	while (sum(indcond) < 10)
+	{
+		large = large + 3
+		bornes = mean(dataj[25:48]) + c(-large,large)
+		indcond = varexp[,"PMjour"]>=bornes[1] & varexp[,"PMjour"]<=bornes[2]
+	}
 	data = data[indcond,] #pollution du 2eme jour == pollution du jour courant +/- 1
 	varexp = varexp[indcond,]
 
@@ -102,6 +116,8 @@ Kvois = NULL
 	} else {
 		erreurPrev = mean(abs(dataj[(H+1):48] - JourMoy[(H+1):48]))
 	}
+
+	list(serie=dataj, prev=JourMoy, err=erreurPrev, line=(nl+ij), neighbs=ind, dates=data[ind,1])
 #	erreur24 = mean(abs(dataj[25:48] - JourMoy[25:48]))
 #	#png(NomFile)
 #	matplot(t(data[ind, 1:48]), type = "l", lwd=1.4, lty=1, col=1:length(ind), 
@@ -149,3 +165,4 @@ Kvois = NULL
 ## xx = dev.off()
 #
 ##length(D)
+}
diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R
index 02536eb..60eb7b7 100644
--- a/pkg/R/F_Neighbors.R
+++ b/pkg/R/F_Neighbors.R
@@ -61,8 +61,8 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 			if (!opera)
 				tdays = setdiff(tdays, today) #always exclude current day
 
-			# Shortcut if window is known
-			if (hasArg("window"))
+			# Shortcut if window is known or local==TRUE && simtype==none
+			if (hasArg("window") || (local && simtype=="none"))
 			{
 				return ( private$.predictShapeAux(data, tdays, today, predict_from, horizon,
 					local, list(...)$window, simtype, opera, TRUE) )
@@ -129,8 +129,8 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 
 			if (local)
 			{
-				# TODO: 60 == magic number
-				tdays = getSimilarDaysIndices(today, data, limit=60, same_season=TRUE,
+				# limit=Inf to not censor any day (TODO: finite limit? 60?)
+				tdays = getSimilarDaysIndices(today, data, limit=Inf, same_season=TRUE,
 					days_in=tdays_cut, operational=opera)
 #				if (length(tdays) <= 1)
 #					return (NA)
@@ -146,6 +146,13 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 					}
 					return ( data$getSerie(tdays[1])[predict_from:horizon] )
 				}
+				max_neighbs = 12 #TODO: 12 = arbitrary number
+				if (length(tdays) > max_neighbs)
+				{
+					distances2 <- .computeDistsEndo(data, today, tdays, predict_from)
+					ordering <- order(distances2)
+					tdays <- tdays[ ordering[1:max_neighbs] ]
+				}
 			}
 			else
 				tdays = tdays_cut #no conditioning
@@ -156,19 +163,9 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 				window_endo = ifelse(simtype=="mix", window[1], window)
 
 				# Distances from last observed day to selected days in the past
+				# TODO: redundant computation if local==TRUE
 				distances2 <- .computeDistsEndo(data, today, tdays, predict_from)
 
-				if (local)
-				{
-					max_neighbs = 12 #TODO: 12 = arbitrary number
-					if (length(distances2) > max_neighbs)
-					{
-						ordering <- order(distances2)
-						tdays <- tdays[ ordering[1:max_neighbs] ]
-						distances2 <- distances2[ ordering[1:max_neighbs] ]
-					}
-				}
-
 				simils_endo <- .computeSimils(distances2, window_endo)
 			}
 
diff --git a/pkg/R/utils.R b/pkg/R/utils.R
index bb76996..bc80030 100644
--- a/pkg/R/utils.R
+++ b/pkg/R/utils.R
@@ -78,6 +78,8 @@ getSimilarDaysIndices = function(index, data, limit, same_season,
 		j = index + 1
 	while (length(days) < min( limit, ifelse(is.null(days_in),Inf,length(days_in)) ))
 	{
+		if (i < 1 && j > data$getSize())
+			break
 		if (i >= 1)
 		{
 			dt = as.POSIXlt(data$getTime(i)[1])
@@ -111,6 +113,8 @@ getSimilarDaysIndices = function(index, data, limit, same_season,
 #
 .isSameSeason = function(month, month_ref)
 {
+	if (month_ref == 3) #TODO: same as Bruno (but weird)
+		return (month %in% c(2,3,4,9,10))
 	if (month_ref %in% c(11,12,1,2)) #~= mid-polluted
 		return (month %in% c(11,12,1,2))
 	if (month_ref %in% c(3,4,9,10)) #~= high-polluted