From: Benjamin Auder Date: Tue, 28 Mar 2017 16:53:04 +0000 (+0200) Subject: cleaning - fix getSimilarDaysIndices ; to finish X-Git-Url: https://git.auder.net/doc/html/css/scripts/%7B%7B%20asset%28%27mixstore/%7B%7B?a=commitdiff_plain;h=a866acb3c0ae138b22df9dae9ec576b866794417;p=talweg.git cleaning - fix getSimilarDaysIndices ; to finish --- diff --git a/pkg/R/F_Neighbors2.R b/pkg/R/F_Neighbors2.R index fb63e40..83ef453 100644 --- a/pkg/R/F_Neighbors2.R +++ b/pkg/R/F_Neighbors2.R @@ -9,15 +9,15 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster", inherit = Forecaster, public = list( - predictSerie = function(data, today, memory, horizon, ...) - { - # Parameters (potentially) computed during shape prediction stage - predicted_shape = self$predictShape(data, today, memory, horizon, ...) -# predicted_delta = private$.pjump(data,today,memory,horizon,private$.params,...) - # Predicted shape is aligned it on the end of current day + jump -# predicted_shape+tail(data$getSerie(today),1)-predicted_shape[1]+predicted_delta - predicted_shape - }, +# predictSerie = function(data, today, memory, horizon, ...) +# { +# # Parameters (potentially) computed during shape prediction stage +# predicted_shape = self$predictShape(data, today, memory, horizon, ...) +## predicted_delta = private$.pjump(data,today,memory,horizon,private$.params,...) +# # Predicted shape is aligned it on the end of current day + jump +## predicted_shape+tail(data$getSerie(today),1)-predicted_shape[1]+predicted_delta +# predicted_shape +# }, predictShape = function(data, today, memory, horizon, ...) { # (re)initialize computed parameters @@ -101,7 +101,7 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster", { fdays = fdays[ fdays < today ] # TODO: 3 = magic number - if (length(fdays) < 3) + if (length(fdays) < 1) return (NA) # Neighbors: days in "same season" @@ -110,14 +110,26 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster", levelToday = data$getLevel(today) distances = sapply(indices, function(i) abs(data$getLevel(i)-levelToday)) same_pollution = (distances <= 2) - if (sum(same_pollution) < 3) #TODO: 3 == magic number + if (sum(same_pollution) < 1) #TODO: 3 == magic number { same_pollution = (distances <= 5) - if (sum(same_pollution) < 3) + if (sum(same_pollution) < 1) return (NA) } indices = indices[same_pollution] + #TODO: we shouldn't need that block + if (length(indices) == 1) + { + if (final_call) + { + private$.params$weights <- 1 + private$.params$indices <- indices + private$.params$window <- 1 + } + return ( data$getSerie(indices[1])[1:horizon] ) #what else?! + } + if (simtype != "exo") { h_endo = ifelse(simtype=="mix", h[1], h) @@ -159,7 +171,7 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster", sigma = cov(M) #NOTE: robust covariance is way too slow # sigma_inv = solve(sigma) #TODO: use pseudo-inverse if needed? sigma_inv = MASS::ginv(sigma) -#if (final_call) browser() + # Distances from last observed day to days in the past distances2 = sapply(seq_along(indices), function(i) { delta = M[1,] - M[i+1,] @@ -200,7 +212,7 @@ Neighbors2Forecaster = R6::R6Class("Neighbors2Forecaster", if (final_call) { private$.params$weights <- similarities - private$.params$indices <- fdays + private$.params$indices <- indices private$.params$window <- if (simtype=="endo") h_endo diff --git a/pkg/R/computeForecast.R b/pkg/R/computeForecast.R index 3537e8a..bd19b8d 100644 --- a/pkg/R/computeForecast.R +++ b/pkg/R/computeForecast.R @@ -59,14 +59,26 @@ computeForecast = function(data, indices, forecaster, pjump, #oo = forecaster$predictSerie(data, integer_indices[1], memory, horizon, ...) #browser() - library(parallel) - ppp <- parallel::mclapply(seq_along(integer_indices), function(i) { - list( - "forecast" = forecaster$predictSerie(data, integer_indices[i], memory, horizon, ...), - "params"= forecaster$getParameters(), - "index" = integer_indices[i] ) - }, mc.cores=3) - + parll=TRUE #FALSE + if (parll) + { + library(parallel) + ppp <- parallel::mclapply(seq_along(integer_indices), function(i) { + list( + "forecast" = forecaster$predictSerie(data, integer_indices[i], memory, horizon, ...), + "params"= forecaster$getParameters(), + "index" = integer_indices[i] ) + }, mc.cores=3) + } + else + { + ppp <- lapply(seq_along(integer_indices), function(i) { + list( + "forecast" = forecaster$predictSerie(data, integer_indices[i], memory, horizon, ...), + "params"= forecaster$getParameters(), + "index" = integer_indices[i] ) + }) + } #browser() for (i in seq_along(integer_indices)) diff --git a/pkg/R/utils.R b/pkg/R/utils.R index f79c300..20f396b 100644 --- a/pkg/R/utils.R +++ b/pkg/R/utils.R @@ -65,7 +65,7 @@ getSimilarDaysIndices = function(index, limit, same_season, data=NULL) { index = dateIndexToInteger(index) - #TODO: mardi similaire à lundi mercredi jeudi aussi ...etc + #TODO: mardi similaire à lundi mercredi jeudi aussi ...etc ==> "isSimilarDay()..." if (!same_season) { #take all similar days in recent past @@ -73,36 +73,36 @@ getSimilarDaysIndices = function(index, limit, same_season, data=NULL) return ( rep(index,nb_days) - 7*seq_len(nb_days) ) } - - #TODO: use data... 12-12-1-2 CH, 3-4-9-10 EP et le reste NP #Look for similar days in similar season + nb_days = min( (index-1) %/% 7, limit) + i = index - 7 days = c() - i = index + month_ref = as.POSIXlt(data$getTime(index)[1])$mon + 1 while (i >= 1 && length(days) < limit) { - if (i < index) - { + if (isSameSeason(as.POSIXlt(data$getTime(i)[1])$mon + 1, month_ref)) days = c(days, i) - #look in the "future of the past" - for (j in 1:4) - days = c(days, i+7*j) - } - #...and in the "past of the past" - for (j in 1:4) - { - if (i - 7*j >= 1) - days = c(days, i-7*j) - } - # TODO: exact computation instead of -364 - # 364 = closest multiple of 7 to 365 - drift along the years... but not so many years so OK - i = i - 364 - - + i = i-7 } + return ( days ) +} - return ( days[1:min(limit,length(days))] ) +#TODO: use data... 12-12-1-2 CH, 3-4-9-10 EP et le reste NP +isSameSeason = function(month, month_ref) +{ + if (month_ref %in% c(11,12,1,2)) + return (month %in% c(11,12,1,2)) + if (month_ref %in% c(3,4,9,10)) + return (month %in% c(3,4,9,10)) + return (month %in% c(5,6,7,8)) } +#TODO: +#distinction lun-jeudi, puis ven, sam, dim +#isSameDay = function(day, day_ref) +#{ +# if (day_ref == + #' getNoNA2 #' #' Get indices in data of no-NA series followed by no-NA, within [first,last] range. diff --git a/reports/report.ipynb b/reports/report.ipynb index 899fbf6..3dac8ec 100644 --- a/reports/report.ipynb +++ b/reports/report.ipynb @@ -59,8 +59,6 @@ "editable": true }, "source": [ - "\n", - "\n", "

Pollution par chauffage

" ] }, @@ -75,12 +73,34 @@ "outputs": [], "source": [ "reload(\"../pkg\")\n", - "p1 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"exo\")\n", - "p2 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"endo\")\n", - "p3 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"mix\")\n", + "#p1 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"exo\")\n", + "#p2 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"endo\")\n", + "#p3 = computeForecast(data, indices_ch, \"Neighbors\", \"Zero\", horizon=H, simtype=\"mix\")\n", "p4 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"exo\")\n", "p5 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"endo\")\n", - "p6 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"mix\")\n" + "p6 = computeForecast(data, indices_ch, \"Neighbors2\", \"Zero\", horizon=H, simtype=\"mix\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "getSimilarDaysIndices(1000,10,TRUE,data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "as.POSIXlt(data$getTime(1000)[1])" ] }, { @@ -110,17 +130,7 @@ }, "outputs": [], "source": [ - "e_nn_exo = computeError(data, p_nn_exo, 3)\n", - "e_nn_mix = computeError(data, p_nn_mix, 3)\n", - "e_az = computeError(data, p_az, 3)\n", - "e_pz = computeError(data, p_pz, 3)\n", - "options(repr.plot.width=9, repr.plot.height=7)\n", - "plotError(list(e_nn_mix, e_pz, e_az, e_nn_exo), cols=c(1,2,colors()[258], 4))\n", - "\n", - "# Noir: neighbors_mix, bleu: neighbors_exo, vert: moyenne, rouge: persistence\n", - "\n", - "i_np = which.min(e_nn_exo$abs$indices)\n", - "i_p = which.max(e_nn_exo$abs$indices)" + "plotError(list(e4,e1,e2,e3, e5,e6), cols=c(1,2,3,4,5,6))" ] }, {