TODO: comprendre pkoi pas mêmes voisins
authorBenjamin Auder <benjamin.auder@somewhere>
Thu, 27 Apr 2017 14:37:18 +0000 (16:37 +0200)
committerBenjamin Auder <benjamin.auder@somewhere>
Thu, 27 Apr 2017 14:37:18 +0000 (16:37 +0200)
Etude_En_Cours_R_E_bis.R
pkg/R/F_Neighbors.R
pkg/R/utils.R

index aa0d490..457cf79 100644 (file)
@@ -1,5 +1,7 @@
+predi <- function(ij)
+{
 #setwd("/Users/bp/Desktop/CONTRATS_AirNormand/2016/RapportFinalBruno")
-rm(list=ls())
+#rm(list=ls())
 
 # Lecture des données: pm = dataframe 2 colonnes, date-time puis PM10 horaire
 pm  =  read.table("DATA/mesures_horaires_hloc_pm10_a_filer.csv",sep=",",dec=".",header=T)
@@ -48,7 +50,7 @@ ResDates = NULL
 
 nbvois=10
 j=1 # numéro de semaine
-ij=6 # numéro du jour (0 = lundi)
+#ij=6 # numéro du jour (0 = lundi)
 
 Err24 = NULL
 ErrPrev = NULL
@@ -79,6 +81,18 @@ Kvois = NULL
        large = 1
        bornes = mean(dataj[25:48]) + c(-large,large)
        indcond = varexp[,"PMjour"]>=bornes[1] & varexp[,"PMjour"]<=bornes[2]
+       if (sum(indcond) < 10)
+       {
+               large = 2
+               bornes = mean(dataj[25:48]) + c(-large,large)
+               indcond = varexp[,"PMjour"]>=bornes[1] & varexp[,"PMjour"]<=bornes[2]
+       }
+       while (sum(indcond) < 10)
+       {
+               large = large + 3
+               bornes = mean(dataj[25:48]) + c(-large,large)
+               indcond = varexp[,"PMjour"]>=bornes[1] & varexp[,"PMjour"]<=bornes[2]
+       }
        data = data[indcond,] #pollution du 2eme jour == pollution du jour courant +/- 1
        varexp = varexp[indcond,]
 
@@ -102,6 +116,8 @@ Kvois = NULL
        } else {
                erreurPrev = mean(abs(dataj[(H+1):48] - JourMoy[(H+1):48]))
        }
+
+       list(serie=dataj, prev=JourMoy, err=erreurPrev, line=(nl+ij), neighbs=ind, dates=data[ind,1])
 #      erreur24 = mean(abs(dataj[25:48] - JourMoy[25:48]))
 #      #png(NomFile)
 #      matplot(t(data[ind, 1:48]), type = "l", lwd=1.4, lty=1, col=1:length(ind), 
@@ -149,3 +165,4 @@ Kvois = NULL
 ## xx = dev.off()
 #
 ##length(D)
+}
index 02536eb..60eb7b7 100644 (file)
@@ -61,8 +61,8 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
                        if (!opera)
                                tdays = setdiff(tdays, today) #always exclude current day
 
-                       # Shortcut if window is known
-                       if (hasArg("window"))
+                       # Shortcut if window is known or local==TRUE && simtype==none
+                       if (hasArg("window") || (local && simtype=="none"))
                        {
                                return ( private$.predictShapeAux(data, tdays, today, predict_from, horizon,
                                        local, list(...)$window, simtype, opera, TRUE) )
@@ -129,8 +129,8 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
 
                        if (local)
                        {
-                               # TODO: 60 == magic number
-                               tdays = getSimilarDaysIndices(today, data, limit=60, same_season=TRUE,
+                               # limit=Inf to not censor any day (TODO: finite limit? 60?)
+                               tdays = getSimilarDaysIndices(today, data, limit=Inf, same_season=TRUE,
                                        days_in=tdays_cut, operational=opera)
 #                              if (length(tdays) <= 1)
 #                                      return (NA)
@@ -146,6 +146,13 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
                                        }
                                        return ( data$getSerie(tdays[1])[predict_from:horizon] )
                                }
+                               max_neighbs = 12 #TODO: 12 = arbitrary number
+                               if (length(tdays) > max_neighbs)
+                               {
+                                       distances2 <- .computeDistsEndo(data, today, tdays, predict_from)
+                                       ordering <- order(distances2)
+                                       tdays <- tdays[ ordering[1:max_neighbs] ]
+                               }
                        }
                        else
                                tdays = tdays_cut #no conditioning
@@ -156,19 +163,9 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster",
                                window_endo = ifelse(simtype=="mix", window[1], window)
 
                                # Distances from last observed day to selected days in the past
+                               # TODO: redundant computation if local==TRUE
                                distances2 <- .computeDistsEndo(data, today, tdays, predict_from)
 
-                               if (local)
-                               {
-                                       max_neighbs = 12 #TODO: 12 = arbitrary number
-                                       if (length(distances2) > max_neighbs)
-                                       {
-                                               ordering <- order(distances2)
-                                               tdays <- tdays[ ordering[1:max_neighbs] ]
-                                               distances2 <- distances2[ ordering[1:max_neighbs] ]
-                                       }
-                               }
-
                                simils_endo <- .computeSimils(distances2, window_endo)
                        }
 
index bb76996..bc80030 100644 (file)
@@ -78,6 +78,8 @@ getSimilarDaysIndices = function(index, data, limit, same_season,
                j = index + 1
        while (length(days) < min( limit, ifelse(is.null(days_in),Inf,length(days_in)) ))
        {
+               if (i < 1 && j > data$getSize())
+                       break
                if (i >= 1)
                {
                        dt = as.POSIXlt(data$getTime(i)[1])
@@ -111,6 +113,8 @@ getSimilarDaysIndices = function(index, data, limit, same_season,
 #
 .isSameSeason = function(month, month_ref)
 {
+       if (month_ref == 3) #TODO: same as Bruno (but weird)
+               return (month %in% c(2,3,4,9,10))
        if (month_ref %in% c(11,12,1,2)) #~= mid-polluted
                return (month %in% c(11,12,1,2))
        if (month_ref %in% c(3,4,9,10)) #~= high-polluted