cleaning - fix getSimilarDaysIndices ; to finish
authorBenjamin Auder <benjamin.auder@somewhere>
Tue, 28 Mar 2017 16:53:04 +0000 (18:53 +0200)
committerBenjamin Auder <benjamin.auder@somewhere>
Tue, 28 Mar 2017 16:53:04 +0000 (18:53 +0200)
pkg/R/F_Neighbors2.R
pkg/R/computeForecast.R
pkg/R/utils.R
reports/report.ipynb

index fb63e40..83ef453 100644 (file)
@@ -9,15 +9,15 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
        inherit = Forecaster,
 
        public = list(
-               predictSerie = function(data, today, memory, horizon, ...)
-               {
-                       # Parameters (potentially) computed during shape prediction stage
-                       predicted_shape = self$predictShape(data, today, memory, horizon, ...)
-#                      predicted_delta = private$.pjump(data,today,memory,horizon,private$.params,...)
-                       # Predicted shape is aligned it on the end of current day + jump
-#                      predicted_shape+tail(data$getSerie(today),1)-predicted_shape[1]+predicted_delta
-                       predicted_shape
-               },
+#              predictSerie = function(data, today, memory, horizon, ...)
+#              {
+#                      # Parameters (potentially) computed during shape prediction stage
+#                      predicted_shape = self$predictShape(data, today, memory, horizon, ...)
+##                     predicted_delta = private$.pjump(data,today,memory,horizon,private$.params,...)
+#                      # Predicted shape is aligned it on the end of current day + jump
+##                     predicted_shape+tail(data$getSerie(today),1)-predicted_shape[1]+predicted_delta
+#                      predicted_shape
+#              },
                predictShape = function(data, today, memory, horizon, ...)
                {
                        # (re)initialize computed parameters
@@ -101,7 +101,7 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
                {
                        fdays = fdays[ fdays < today ]
                        # TODO: 3 = magic number
-                       if (length(fdays) < 3)
+                       if (length(fdays) < 1)
                                return (NA)
 
                        # Neighbors: days in "same season"
@@ -110,14 +110,26 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
                        levelToday = data$getLevel(today)
                        distances = sapply(indices, function(i) abs(data$getLevel(i)-levelToday))
                        same_pollution = (distances <= 2)
-                       if (sum(same_pollution) < 3) #TODO: 3 == magic number
+                       if (sum(same_pollution) < 1) #TODO: 3 == magic number
                        {
                                same_pollution = (distances <= 5)
-                               if (sum(same_pollution) < 3)
+                               if (sum(same_pollution) < 1)
                                        return (NA)
                        }
                        indices = indices[same_pollution]
 
+                       #TODO: we shouldn't need that block
+                       if (length(indices) == 1)
+                       {
+                               if (final_call)
+                               {
+                                       private$.params$weights <- 1
+                                       private$.params$indices <- indices
+                                       private$.params$window <- 1
+                               }
+                               return ( data$getSerie(indices[1])[1:horizon] ) #what else?!
+                       }
+
                        if (simtype != "exo")
                        {
                                h_endo = ifelse(simtype=="mix", h[1], h)
@@ -159,7 +171,7 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
                                sigma = cov(M) #NOTE: robust covariance is way too slow
 #                              sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed?
                                sigma_inv = MASS::ginv(sigma)
-#if (final_call) browser()
+
                                # Distances from last observed day to days in the past
                                distances2 = sapply(seq_along(indices), function(i) {
                                        delta = M[1,] - M[i+1,]
@@ -200,7 +212,7 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster",
                        if (final_call)
                        {
                                private$.params$weights <- similarities
-                               private$.params$indices <- fdays
+                               private$.params$indices <- indices
                                private$.params$window <-
                                        if (simtype=="endo")
                                                h_endo
index 3537e8a..bd19b8d 100644 (file)
@@ -59,14 +59,26 @@ computeForecast = function(data, indices, forecaster, pjump,
 #oo = forecaster$predictSerie(data, integer_indices[1], memory, horizon, ...)
 #browser()
 
-       library(parallel)
-       ppp <- parallel::mclapply(seq_along(integer_indices), function(i) {
-               list(
-                       "forecast" = forecaster$predictSerie(data, integer_indices[i], memory, horizon, ...),
-                       "params"= forecaster$getParameters(),
-                       "index" = integer_indices[i] )
-               }, mc.cores=3)
-
+       parll=TRUE #FALSE
+       if (parll)
+       {
+               library(parallel)
+               ppp <- parallel::mclapply(seq_along(integer_indices), function(i) {
+                       list(
+                               "forecast" = forecaster$predictSerie(data, integer_indices[i], memory, horizon, ...),
+                               "params"= forecaster$getParameters(),
+                               "index" = integer_indices[i] )
+                       }, mc.cores=3)
+       }
+       else
+       {
+               ppp <- lapply(seq_along(integer_indices), function(i) {
+                       list(
+                               "forecast" = forecaster$predictSerie(data, integer_indices[i], memory, horizon, ...),
+                               "params"= forecaster$getParameters(),
+                               "index" = integer_indices[i] )
+                       })
+       }
 #browser()
 
 for (i in seq_along(integer_indices))
index f79c300..20f396b 100644 (file)
@@ -65,7 +65,7 @@ getSimilarDaysIndices = function(index, limit, same_season, data=NULL)
 {
        index = dateIndexToInteger(index)
 
-       #TODO: mardi similaire à lundi mercredi jeudi aussi ...etc
+       #TODO: mardi similaire à lundi mercredi jeudi aussi ...etc ==> "isSimilarDay()..."
        if (!same_season)
        {
                #take all similar days in recent past
@@ -73,36 +73,36 @@ getSimilarDaysIndices = function(index, limit, same_season, data=NULL)
                return ( rep(index,nb_days) - 7*seq_len(nb_days) )
        }
 
-
-       #TODO: use data... 12-12-1-2 CH, 3-4-9-10 EP et le reste NP
        #Look for similar days in similar season
+       nb_days = min( (index-1) %/% 7, limit)
+       i = index - 7
        days = c()
-       i = index
+       month_ref = as.POSIXlt(data$getTime(index)[1])$mon + 1
        while (i >= 1 && length(days) < limit)
        {
-               if (i < index)
-               {
+               if (isSameSeason(as.POSIXlt(data$getTime(i)[1])$mon + 1, month_ref))
                        days = c(days, i)
-                       #look in the "future of the past"
-                       for (j in 1:4)
-                               days = c(days, i+7*j)
-               }
-               #...and in the "past of the past"
-               for (j in 1:4)
-               {
-                       if (i - 7*j >= 1)
-                               days = c(days, i-7*j)
-               }
-               # TODO: exact computation instead of -364
-               # 364 = closest multiple of 7 to 365 - drift along the years... but not so many years so OK
-               i = i - 364
-
-
+               i = i-7
        }
+       return ( days )
+}
 
-       return ( days[1:min(limit,length(days))] )
+#TODO: use data... 12-12-1-2 CH, 3-4-9-10 EP et le reste NP
+isSameSeason = function(month, month_ref)
+{
+       if (month_ref %in% c(11,12,1,2))
+               return (month %in% c(11,12,1,2))
+       if (month_ref %in% c(3,4,9,10))
+               return (month %in% c(3,4,9,10))
+       return (month %in% c(5,6,7,8))
 }
 
+#TODO:
+#distinction lun-jeudi, puis ven, sam, dim
+#isSameDay = function(day, day_ref)
+#{
+#      if (day_ref == 
+
 #' getNoNA2
 #'
 #' Get indices in data of no-NA series followed by no-NA, within [first,last] range.
index 899fbf6..3dac8ec 100644 (file)
@@ -59,8 +59,6 @@
     "editable": true
    },
    "source": [
-    "\n",
-    "\n",
     "<h2 style=\"color:blue;font-size:2em\">Pollution par chauffage</h2>"
    ]
   },
    "outputs": [],
    "source": [
     "reload(\"../pkg\")\n",
-    "p1 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"exo\")\n",
-    "p2 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"endo\")\n",
-    "p3 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"mix\")\n",
+    "#p1 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"exo\")\n",
+    "#p2 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"endo\")\n",
+    "#p3 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"mix\")\n",
     "p4 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"exo\")\n",
     "p5 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"endo\")\n",
-    "p6 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"mix\")\n"
+    "p6 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"mix\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "getSimilarDaysIndices(1000,10,TRUE,data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "as.POSIXlt(data$getTime(1000)[1])"
    ]
   },
   {
    },
    "outputs": [],
    "source": [
-    "e_nn_exo = computeError(data, p_nn_exo, 3)\n",
-    "e_nn_mix = computeError(data, p_nn_mix, 3)\n",
-    "e_az = computeError(data, p_az, 3)\n",
-    "e_pz = computeError(data, p_pz, 3)\n",
-    "options(repr.plot.width=9, repr.plot.height=7)\n",
-    "plotError(list(e_nn_mix, e_pz, e_az, e_nn_exo), cols=c(1,2,colors()[258], 4))\n",
-    "\n",
-    "# Noir: neighbors_mix, bleu: neighbors_exo, vert: moyenne, rouge: persistence\n",
-    "\n",
-    "i_np = which.min(e_nn_exo$abs$indices)\n",
-    "i_p = which.max(e_nn_exo$abs$indices)"
+    "plotError(list(e4,e1,e2,e3, e5,e6), cols=c(1,2,3,4,5,6))"
    ]
   },
   {