X-Git-Url: https://git.auder.net/?p=talweg.git;a=blobdiff_plain;f=pkg%2FR%2Futils.R;h=bb769967b328edba87f583b74eabb2e4df54a916;hp=64c3c0a1b5c586e009cc8ae44ff271a0c1a7f2b1;hb=638f27f4296727aff62b56643beb9f42aa5b57ef;hpb=98e958cab563866f8e00886b54336018a2e8bc97 diff --git a/pkg/R/utils.R b/pkg/R/utils.R index 64c3c0a..bb76996 100644 --- a/pkg/R/utils.R +++ b/pkg/R/utils.R @@ -1,14 +1,14 @@ #' dateIndexToInteger #' -#' Transform a (potential) date index into an integer (relative to data) +#' Transform a (potential) date index into an integer (relative to data beginning). #' #' @param index Date (or integer) index -#' @param data Object of class \code{Data} +#' @param data Object of class Data, output of \code{getData()} #' #' @export dateIndexToInteger = function(index, data) { - #works on integers too: trust input + # Works on integers too: trust input if (is.numeric(index)) index = as.integer(index) if (is.integer(index)) @@ -16,7 +16,8 @@ dateIndexToInteger = function(index, data) if (inherits(index, "Date") || is.character(index)) { - tryCatch(indexAsDate <- as.Date(index), error=function(e) stop("Unrecognized index format")) + tryCatch(indexAsDate <- as.Date(index), + error=function(e) stop("Unrecognized index format")) #TODO: tz arg to difftime ? integerIndex <- round( as.numeric( difftime(indexAsDate, as.Date(data$getTime(1)[1])) ) ) + 1 @@ -29,15 +30,14 @@ dateIndexToInteger = function(index, data) #' integerIndexToDate #' -#' Transform an integer index to date index (relative to data) +#' Transform an integer index (relative to data beginning) into a date index. #' -#' @param index Date (or integer) index -#' @param data Object of class \code{Data} +#' @inheritParams dateIndexToInteger #' #' @export integerIndexToDate = function(index, data) { - #works on dates too: trust input + # Works on dates too: trust input if (is.character(index)) index = as.Date(index) if (is(index,"Date")) @@ -53,74 +53,96 @@ integerIndexToDate = function(index, data) #' getSimilarDaysIndices #' -#' Find similar days indices in the past +#' Find similar days indices in the past; at least same type of day in the week: +#' monday=tuesday=wednesday=thursday != friday != saturday != sunday. #' #' @param index Day index (numeric or date) +#' @param data Reference dataset, object output of \code{getData} #' @param limit Maximum number of indices to return -#' @param same_seaon Should the indices correspond to day in same season? +#' @param same_season Should the indices correspond to day in same season? +#' @param days_in Optional set to intersect with results (NULL to discard) #' #' @export -getSimilarDaysIndices = function(index, limit, same_season) +getSimilarDaysIndices = function(index, data, limit, same_season, + days_in=NULL, operational=TRUE) { - index = dateIndexToInteger(index) - - #TODO: mardi similaire à lundi mercredi jeudi aussi ...etc - if (!same_season) - { - #take all similar days in recent past - nb_days = min( (index-1) %/% 7, limit) - return ( rep(index,nb_days) - 7*seq_len(nb_days) ) - } + index = dateIndexToInteger(index, data) - #Look for similar days in similar season (+/- 30 days) + # Look for similar days (optionally in same season) days = c() - i = index - while (i >= 1 && length(days) < limit) + dt_ref = as.POSIXlt(data$getTime(index)[1]) #first date-time of current day + day_ref = dt_ref$wday #1=monday, ..., 6=saturday, 0=sunday + month_ref = as.POSIXlt(data$getTime(index)[1])$mon+1 #month in 1...12 + i = index - 1 + if (!operational) + j = index + 1 + while (length(days) < min( limit, ifelse(is.null(days_in),Inf,length(days_in)) )) { - if (i < index) + if (i >= 1) { - days = c(days, i) - #look in the "future of the past" - for (j in 1:4) - days = c(days, i+7*j) + dt = as.POSIXlt(data$getTime(i)[1]) + if ((is.null(days_in) || i %in% days_in) && .isSameDay(dt$wday, day_ref)) + { + if (!same_season || .isSameSeason(dt$mon+1, month_ref)) + days = c(days, i) + } + i = i - 1 } - #...and in the "past of the past" - for (j in 1:4) + if (!operational && j <= data$getSize()) { - if (i - 7*j >= 1) - days = c(days, i-7*j) + dt = as.POSIXlt(data$getTime(j)[1]) + if ((is.null(days_in) || j %in% days_in) && .isSameDay(dt$wday, day_ref)) + { + if (!same_season || .isSameSeason(dt$mon+1, month_ref)) + days = c(days, j) + } + j = j + 1 } - # TODO: exact computation instead of -364 - # 364 = closest multiple of 7 to 365 - drift along the years... but not so many years so OK - i = i - 364 } + return ( days ) +} - return ( days[1:min(limit,length(days))] ) +# isSameSeason +# +# Check if two months fall in the same "season" (defined by estimated pollution rate). +# +# @param month Month index to test +# @param month_ref Month to compare to +# +.isSameSeason = function(month, month_ref) +{ + if (month_ref %in% c(11,12,1,2)) #~= mid-polluted + return (month %in% c(11,12,1,2)) + if (month_ref %in% c(3,4,9,10)) #~= high-polluted + return (month %in% c(3,4,9,10)) + return (month %in% c(5,6,7,8)) #~= non polluted } -#' getSerie -#' -#' Return a time-serie from its centered version + level -#' -#' @param data A list as returned by \code{getData} -#' @param index The index to return -#' -#' @export -getSerie = function(data, index) - data[[index]]$centered_serie + data[[index]]$level +# isSameDay +# +# Monday=Tuesday=Wednesday=Thursday ; Friday, Saturday, Sunday: specials. +# +# @param day Day index to test +# @param day_ref Day index to compare to +# +.isSameDay = function(day, day_ref) +{ + if (day_ref %in% 1:4) + return (day %in% 1:4) + return (day == day_ref) +} -#' getNoNA2 -#' -#' Get indices in data of no-NA series followed by no-NA, within [first,last] range. -#' -#' @param data Object of class Data -#' @param first First index (included) -#' @param last Last index (included) -#' -#' @export -getNoNA2 = function(data, first, last) +# getNoNA2 +# +# Get indices in data of no-NA series preceded by no-NA, within [first,last] range. +# +# @inheritParams dateIndexToInteger +# @param first First index (included) +# @param last Last index (included) +# +.getNoNA2 = function(data, first, last) { (first:last)[ sapply(first:last, function(i) - !any( is.na(data$getCenteredSerie(i)) | is.na(data$getCenteredSerie(i+1)) ) + !any( is.na(data$getSerie(i-1)) | is.na(data$getSerie(i)) ) ) ] }