From: Benjamin Auder Date: Sun, 9 Apr 2017 21:34:35 +0000 (+0200) Subject: TODO: unit tests for simil days X-Git-Url: https://git.auder.net/doc/html/index.html?a=commitdiff_plain;h=3ddf1c12af0c167fe7d3bb59e63258550270cfc5;p=talweg.git TODO: unit tests for simil days --- diff --git a/pkg/R/Data.R b/pkg/R/Data.R index c193f7d..92ba2c1 100644 --- a/pkg/R/Data.R +++ b/pkg/R/Data.R @@ -25,7 +25,8 @@ #' \item{\code{getStdHorizon()}}{ #' Number of time steps from serie[1] until midnight} #' \item{\code{append(time, serie, exo, exo_hat)}}{ -#' Measured data for given vector of times + exogenous predictions from last midgnight. +#' Measured data for given vector of times + exogenous predictions from +#' last midgnight.} #' \item{\code{getTime(index)}}{ #' Times (vector) at specified index.} #' \item{\code{getCenteredSerie(index)}}{ diff --git a/pkg/R/F_Average.R b/pkg/R/F_Average.R index 8a33111..8f81747 100644 --- a/pkg/R/F_Average.R +++ b/pkg/R/F_Average.R @@ -9,7 +9,7 @@ #' #' @docType class #' @format R6 class, inherits Forecaster -#' @alias F_Average +#' @aliases F_Average #' AverageForecaster = R6::R6Class("AverageForecaster", inherit = Forecaster, diff --git a/pkg/R/F_Neighbors.R b/pkg/R/F_Neighbors.R index 12595d9..ffb068f 100644 --- a/pkg/R/F_Neighbors.R +++ b/pkg/R/F_Neighbors.R @@ -30,7 +30,7 @@ #' #' @docType class #' @format R6 class, inherits Forecaster -#' @alias F_Neighbors +#' @aliases F_Neighbors #' NeighborsForecaster = R6::R6Class("NeighborsForecaster", inherit = Forecaster, @@ -46,7 +46,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", return (NA) # Determine indices of no-NAs days followed by no-NAs tomorrows - fdays = getNoNA2(data, max(today-memory,1), today-1) + fdays = .getNoNA2(data, max(today-memory,1), today-1) # Get optional args local = ifelse(hasArg("local"), list(...)$local, TRUE) #same level + season? @@ -118,33 +118,14 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", if (local) { - # Neighbors: days in "same season"; TODO: 60 == magic number... + # TODO: 60 == magic number fdays = getSimilarDaysIndices(today, data, limit=60, same_season=TRUE, days_in=fdays_cut) if (length(fdays) <= 1) return (NA) - levelToday = data$getLevel(today) - distances = sapply(fdays, function(i) abs(data$getLevel(i)-levelToday)) - #TODO: 2, 10, 3, 12 magic numbers here... - dist_thresh = 2 - min_neighbs = min(10,length(fdays)) - repeat - { - same_pollution = (distances <= dist_thresh) - nb_neighbs = sum(same_pollution) - if (nb_neighbs >= min_neighbs) #will eventually happen - break - dist_thresh = dist_thresh + 3 - } - fdays = fdays[same_pollution] - max_neighbs = 12 - if (nb_neighbs > max_neighbs) - { - # Keep only max_neighbs closest neighbors - fdays = fdays[ - sort(distances[same_pollution],index.return=TRUE)$ix[1:max_neighbs] ] - } - if (length(fdays) == 1) #the other extreme... + # TODO: 10, 12 == magic numbers + fdays = .getConstrainedNeighbs(today,data,fdays,min_neighbs=10,max_neighbs=12) + if (length(fdays) == 1) { if (final_call) { @@ -170,13 +151,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", mean(delta^2) }) - sd_dist = sd(distances2) - if (sd_dist < .25 * sqrt(.Machine$double.eps)) - { -# warning("All computed distances are very close: stdev too small") - sd_dist = 1 #mostly for tests... FIXME: - } - simils_endo = exp(-distances2/(sd_dist*window_endo^2)) + simils_endo <- .computeSimils(distances2, window_endo) } if (simtype == "exo" || simtype == "mix") @@ -203,13 +178,7 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", delta %*% sigma_inv %*% delta }) - sd_dist = sd(distances2) - if (sd_dist < .25 * sqrt(.Machine$double.eps)) - { -# warning("All computed distances are very close: stdev too small") - sd_dist = 1 #mostly for tests... FIXME: - } - simils_exo = exp(-distances2/(sd_dist*window_exo^2)) + simils_exo <- .computeSimils(distances2, window_exo) } similarities = @@ -246,3 +215,72 @@ NeighborsForecaster = R6::R6Class("NeighborsForecaster", } ) ) + +#' getNoNA2 +#' +#' Get indices in data of no-NA series followed by no-NA, within [first,last] range. +#' +#' @inheritParams dateIndexToInteger +#' @param first First index (included) +#' @param last Last index (included) +#' +.getNoNA2 = function(data, first, last) +{ + (first:last)[ sapply(first:last, function(i) + !any( is.na(data$getCenteredSerie(i)) | is.na(data$getCenteredSerie(i+1)) ) + ) ] +} + +#' getConstrainedNeighbs +#' +#' Get indices of neighbors of similar pollution level (among same season + day type). +#' +#' @param today Index of current day +#' @param data Object of class Data +#' @param fdays Current set of "first days" (no-NA pairs) +#' @param min_neighbs Minimum number of points in a neighborhood +#' @param max_neighbs Maximum number of points in a neighborhood +#' +.getConstrainedNeighbs = function(today, data, fdays, min_neighbs=10, max_neighbs=12) +{ + levelToday = data$getLevel(today) + distances = sapply(fdays, function(i) abs(data$getLevel(i)-levelToday)) + #TODO: 2, +3 : magic numbers + dist_thresh = 2 + min_neighbs = min(min_neighbs,length(fdays)) + repeat + { + same_pollution = (distances <= dist_thresh) + nb_neighbs = sum(same_pollution) + if (nb_neighbs >= min_neighbs) #will eventually happen + break + dist_thresh = dist_thresh + 3 + } + fdays = fdays[same_pollution] + max_neighbs = 12 + if (nb_neighbs > max_neighbs) + { + # Keep only max_neighbs closest neighbors + fdays = fdays[ + sort(distances[same_pollution],index.return=TRUE)$ix[1:max_neighbs] ] + } + fdsays +} + +#' compute similarities +#' +#' Apply the gaussian kernel on computed squared distances. +#' +#' @param distances2 Squared distances +#' @param window Window parameter for the kernel +#' +.computeSimils <- function(distances2, window) +{ + sd_dist = sd(distances2) + if (sd_dist < .25 * sqrt(.Machine$double.eps)) + { +# warning("All computed distances are very close: stdev too small") + sd_dist = 1 #mostly for tests... FIXME: + } + exp(-distances2/(sd_dist*window^2)) +} diff --git a/pkg/R/F_Persistence.R b/pkg/R/F_Persistence.R index eaeca98..fa5f99f 100644 --- a/pkg/R/F_Persistence.R +++ b/pkg/R/F_Persistence.R @@ -10,7 +10,7 @@ #' #' @docType class #' @format R6 class, inherits Forecaster -#' @alias F_Persistence +#' @aliases F_Persistence #' PersistenceForecaster = R6::R6Class("PersistenceForecaster", inherit = Forecaster, diff --git a/pkg/R/F_Zero.R b/pkg/R/F_Zero.R index 977cba9..2c9a7b7 100644 --- a/pkg/R/F_Zero.R +++ b/pkg/R/F_Zero.R @@ -5,7 +5,7 @@ #' #' @docType class #' @format R6 class, inherits Forecaster -#' @alias F_Zero +#' @aliases F_Zero #' ZeroForecaster = R6::R6Class("ZeroForecaster", inherit = Forecaster, diff --git a/pkg/R/J_Neighbors.R b/pkg/R/J_Neighbors.R index 5a818ca..7f66830 100644 --- a/pkg/R/J_Neighbors.R +++ b/pkg/R/J_Neighbors.R @@ -8,7 +8,7 @@ #' @inheritParams computeForecast #' @inheritParams getZeroJumpPredict #' -#' @alias J_Neighbors +#' @aliases J_Neighbors #' getNeighborsJumpPredict = function(data, today, memory, horizon, params, ...) { diff --git a/pkg/R/J_Persistence.R b/pkg/R/J_Persistence.R index 37d05fe..9e56742 100644 --- a/pkg/R/J_Persistence.R +++ b/pkg/R/J_Persistence.R @@ -8,7 +8,7 @@ #' @inheritParams computeForecast #' @inheritParams getZeroJumpPredict #' -#' @alias J_Persistence +#' @aliases J_Persistence #' getPersistenceJumpPredict = function(data, today, memory, horizon, params, ...) { diff --git a/pkg/R/J_Zero.R b/pkg/R/J_Zero.R index 065226e..fb15e3d 100644 --- a/pkg/R/J_Zero.R +++ b/pkg/R/J_Zero.R @@ -6,7 +6,7 @@ #' @param today Index of the current day (predict tomorrow) #' @param params Optional parameters computed by the main forecaster #' -#' @alias J_Zero +#' @aliases J_Zero #' getZeroJumpPredict = function(data, today, memory, horizon, params, ...) { diff --git a/pkg/R/computeForecast.R b/pkg/R/computeForecast.R index 23ccb04..a4a539a 100644 --- a/pkg/R/computeForecast.R +++ b/pkg/R/computeForecast.R @@ -31,9 +31,10 @@ #' @examples #' ts_data <- system.file("extdata","pm10_mesures_H_loc.csv",package="talweg") #' exo_data <- system.file("extdata","meteo_extra_noNAs.csv",package="talweg") -#' data <- getData(ts_data, exo_data, input_tz="GMT", working_tz="GMT", predict_at=7) -#' pred <- computeForecast(data, 2200:2230, "Persistence", "Zero", -#' memory=500, horizon=12, ncores=1) +#' data <- getData(ts_data, exo_data, input_tz="GMT", working_tz="GMT", +#' predict_at=7, limit=200) +#' pred <- computeForecast(data, 100:130, "Persistence", "Zero", +#' memory=50, horizon=12, ncores=1) #' \dontrun{#Sketch for real-time mode: #' data <- Data$new() #' forecaster <- MyForecaster$new(myJumpPredictFunc) diff --git a/pkg/R/utils.R b/pkg/R/utils.R index 3124881..5ba72f0 100644 --- a/pkg/R/utils.R +++ b/pkg/R/utils.R @@ -117,19 +117,3 @@ getSimilarDaysIndices = function(index, data, limit, same_season, days_in=NULL) return (day <= 4) return (day == day_ref) } - -#' getNoNA2 -#' -#' Get indices in data of no-NA series followed by no-NA, within [first,last] range. -#' -#' @inheritParams dateIndexToInteger -#' @param first First index (included) -#' @param last Last index (included) -#' -#' @export -getNoNA2 = function(data, first, last) -{ - (first:last)[ sapply(first:last, function(i) - !any( is.na(data$getCenteredSerie(i)) | is.na(data$getCenteredSerie(i+1)) ) - ) ] -} diff --git a/pkg/tests/testthat.R b/pkg/tests/testthat.R index 2a60ff4..97e5ad5 100644 --- a/pkg/tests/testthat.R +++ b/pkg/tests/testthat.R @@ -1,4 +1,6 @@ library(testthat) -library(talweg) + +load_all() #because some non-exported functions +#library(talweg) test_check("talweg") diff --git a/pkg/tests/testthat/test.dateIndexToInteger.R b/pkg/tests/testthat/test-DateIntegerConv.R similarity index 68% rename from pkg/tests/testthat/test.dateIndexToInteger.R rename to pkg/tests/testthat/test-DateIntegerConv.R index d3c014a..99e5fa5 100644 --- a/pkg/tests/testthat/test.dateIndexToInteger.R +++ b/pkg/tests/testthat/test-DateIntegerConv.R @@ -1,4 +1,4 @@ -context("Check that date <--> integer indexes conversions work") +context("Date <--> integer conversions") ts_data = system.file("testdata","ts_test.csv",package="talweg") exo_data = system.file("testdata","exo_test.csv",package="talweg") @@ -7,7 +7,7 @@ data0 <<- getData(ts_data, exo_data, input_tz="GMT", date_format="%Y-%m-%d %H:%M data7 <<- getData(ts_data, exo_data, input_tz="GMT", date_format="%Y-%m-%d %H:%M", working_tz="GMT", predict_at=7, limit=Inf) -test_that("dateIndexToInteger", +test_that("dateIndexToInteger works as expected", { expect_identical( dateIndexToInteger("2007-01-01",data0), 1 ) expect_identical( dateIndexToInteger("2007-01-02",data0), 2 ) @@ -22,7 +22,7 @@ test_that("dateIndexToInteger", expect_identical( dateIndexToInteger("2007-05-31",data7), 151 ) }) -test_that("integerIndexToDate", +test_that("integerIndexToDate works as expected", { expect_identical( integerIndexToDate( 1,data0), as.Date("2007-01-01") ) expect_identical( integerIndexToDate( 2,data0), as.Date("2007-01-02") ) @@ -53,24 +53,24 @@ test_that("dateIndexToInteger(integerIndexToDate) == Id", test_that("integerIndexToDate(dateIndexToInteger) == Id", { - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-01-01",data0),data0), as.Date("2007-01-01") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-01-01",data7),data7), as.Date("2007-01-01") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-01-02",data0),data0), as.Date("2007-01-02") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-01-02",data7),data7), as.Date("2007-01-02") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-02-01",data0),data0), as.Date("2007-02-01") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-02-01",data0),data0), as.Date("2007-02-01") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-03-01",data0),data0), as.Date("2007-03-01") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-03-01",data0),data0), as.Date("2007-03-01") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-05-31",data0),data0), as.Date("2007-05-31") ) - expect_identical( - integerIndexToDate(dateIndexToInteger("2007-05-31",data0),data0), as.Date("2007-05-31") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-01-01",data0),data0), + as.Date("2007-01-01") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-01-01",data7),data7), + as.Date("2007-01-01") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-01-02",data0),data0), + as.Date("2007-01-02") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-01-02",data7),data7), + as.Date("2007-01-02") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-02-01",data0),data0), + as.Date("2007-02-01") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-02-01",data0),data0), + as.Date("2007-02-01") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-03-01",data0),data0), + as.Date("2007-03-01") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-03-01",data0),data0), + as.Date("2007-03-01") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-05-31",data0),data0), + as.Date("2007-05-31") ) + expect_identical(integerIndexToDate(dateIndexToInteger("2007-05-31",data0),data0), + as.Date("2007-05-31") ) }) diff --git a/pkg/tests/testthat/test.Forecaster.R b/pkg/tests/testthat/test-Forecaster.R similarity index 93% rename from pkg/tests/testthat/test.Forecaster.R rename to pkg/tests/testthat/test-Forecaster.R index a02e6e9..9b3eaa0 100644 --- a/pkg/tests/testthat/test.Forecaster.R +++ b/pkg/tests/testthat/test-Forecaster.R @@ -43,16 +43,20 @@ test_that("Average method behave as expected", test_that("Persistence method behave as expected", { #Situation A: +Zero; (generally) correct if jump, wrong otherwise - pred00_sd = computeForecast(data00, indices, "Persistence", "Zero", Inf, 24, same_day=TRUE) - pred00_dd = computeForecast(data00, indices, "Persistence", "Zero", Inf, 24, same_day=FALSE) + pred00_sd = computeForecast(data00, indices, "Persistence", "Zero", Inf, 24, + same_day=TRUE) + pred00_dd = computeForecast(data00, indices, "Persistence", "Zero", Inf, 24, + same_day=FALSE) for (i in 1:7) { expect_equal(pred00_sd$getSerie(i), rep(pred_order[i],24)) expect_equal(pred00_dd$getSerie(i), rep(pred_order[i],24)) } - pred13_sd = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, same_day=TRUE) - pred13_dd = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, same_day=FALSE) + pred13_sd = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, + same_day=TRUE) + pred13_dd = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, + same_day=FALSE) for (i in 2:6) { expect_equal(pred13_sd$getSerie(i), c( rep(i,11), rep(i%%7+1,13) ) ) @@ -105,18 +109,21 @@ test_that("Persistence method behave as expected", test_that("Neighbors method behave as expected", { #Situation A: +Zero; correct if jump, wrong otherwise - pred00 = computeForecast(data00, indices, "Neighbors", "Zero", Inf, 24, simtype="mix") + pred00 = computeForecast(data00, indices, "Neighbors", "Zero", Inf, 24, + simtype="mix") for (i in 1:7) expect_equal(pred00$getSerie(i), rep(pred_order[i],24)) - pred13 = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, simtype="mix") + pred13 = computeForecast(data13, indices, "Persistence", "Zero", Inf, 24, + simtype="mix") for (i in 1:7) expect_equal(pred13$getSerie(i), c( rep(i,11), rep(i%%7+1,13) ) ) #Situation B: +Neighbors, always predict bad (small, averaged) jump - pred00 = computeForecast(data00, indices, "Neighbors", "Neighbors", Inf, 24, simtype="endo") - #Concerning weights, there are 12+(1 if i>=2) gaps at -6 and 90-12+(i-2 if i>=3) gaps at 1 - #Thus, predicted jump is respectively + pred00 = computeForecast(data00, indices, "Neighbors", "Neighbors", Inf, 24, + simtype="endo") + #Concerning weights, there are 12+(1 if i>=2) gaps at -6 and 90-12+(i-2 if i>=3) gaps + #at 1. Thus, predicted jump is respectively # (12*-6+78)/90 = 0.06666667 # (13*-6+78)/91 = 0 # (13*-6+79)/92 = 0.01086957 @@ -128,8 +135,10 @@ test_that("Neighbors method behave as expected", for (i in 1:7) expect_equal(pred00$getSerie(i), rep(pred_order[i]+jumps[i],24)) - #Next lines commented out because too unpredictable results (tendency to flatten everything...) -# pred13 = computeForecast(data13, indices, "Neighbors", "Neighbors", Inf, 24, simtype="endo") + #Next lines commented out because too unpredictable results + #(tendency to flatten everything...) +# pred13 = computeForecast(data13, indices, "Neighbors", "Neighbors", Inf, 24, +# simtype="endo") # for (i in 1:7) # expect_equal(pred13$getSerie(i), c( rep(i,11), rep(i%%7+1,13) ) ) diff --git a/pkg/tests/testthat/test.computeFilaments.R b/pkg/tests/testthat/test-computeFilaments.R similarity index 97% rename from pkg/tests/testthat/test.computeFilaments.R rename to pkg/tests/testthat/test-computeFilaments.R index ec39340..355d58d 100644 --- a/pkg/tests/testthat/test.computeFilaments.R +++ b/pkg/tests/testthat/test-computeFilaments.R @@ -1,4 +1,4 @@ -context("Check that computeFilaments behaves as expected") +context("computeFilaments") #shorthand: map 1->1, 2->2, 3->3, 4->1, ..., 149->2, 150->3 I = function(i) diff --git a/pkg/tests/testthat/test-similarDays.R b/pkg/tests/testthat/test-similarDays.R new file mode 100644 index 0000000..0d33fce --- /dev/null +++ b/pkg/tests/testthat/test-similarDays.R @@ -0,0 +1,16 @@ +context("Get similar days") + +itestthat("getSimilarDaysIndices works as expected", +{ + getSimilarDaysIndices(index, data, limit, same_season, days_in=NULL) + #... +}) +{ + index = dateIndexToInteger(index, data) + +testthat("getConstrainedNeighbs works as expected", +{ + .getConstrainedNeighbs(today, data, fdays, min_neighbs=10, max_neighbs=12) + #... +}) +