X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=pkg%2FR%2Futils.R;h=bb769967b328edba87f583b74eabb2e4df54a916;hb=638f27f4296727aff62b56643beb9f42aa5b57ef;hp=de8e861cb5c02ab5e0e2c1bc2f26f527b1c34589;hpb=44a9990b6d608ffcd2e99d5193fa8b9e6cbdb436;p=talweg.git diff --git a/pkg/R/utils.R b/pkg/R/utils.R index de8e861..bb76996 100644 --- a/pkg/R/utils.R +++ b/pkg/R/utils.R @@ -1,103 +1,148 @@ -#' @title dateIndexToInteger +#' dateIndexToInteger #' -#' @description Transform a (potential) date index into an integer (relative to data) +#' Transform a (potential) date index into an integer (relative to data beginning). #' #' @param index Date (or integer) index -#' @param data Object of class \code{Data} +#' @param data Object of class Data, output of \code{getData()} #' #' @export dateIndexToInteger = function(index, data) { - index = index[1] + # Works on integers too: trust input if (is.numeric(index)) index = as.integer(index) if (is.integer(index)) return (index) + if (inherits(index, "Date") || is.character(index)) { - tryCatch(indexAsDate <- as.Date(index), error=function(e) stop("Unrecognized index format")) + tryCatch(indexAsDate <- as.Date(index), + error=function(e) stop("Unrecognized index format")) #TODO: tz arg to difftime ? integerIndex <- round( as.numeric( difftime(indexAsDate, as.Date(data$getTime(1)[1])) ) ) + 1 if (integerIndex >= 1 && integerIndex <= data$getSize()) - { - #WARNING: if index>=2 && series start at date >0h, result must be shifted - #Convention: if first date asked, return first matching index (i.e.: 1) - shift = 0 - if (integerIndex >= 2) - { - date1 = as.POSIXlt(data$getTime(1)[1]) - date2 = as.POSIXlt(data$getTime(2)[1]) - shift = (date1$year==date2$year && date1$mon==date2$mon && date1$mday==date2$mday) - } - return (integerIndex + shift) - } + return (integerIndex) stop("Date outside data range") } stop("Unrecognized index format") } -#' @title integerIndexToDate +#' integerIndexToDate #' -#' @description Transform an integer index to date index (relative to data) +#' Transform an integer index (relative to data beginning) into a date index. #' -#' @param index Date (or integer) index -#' @param data Object of class \code{Data} +#' @inheritParams dateIndexToInteger #' #' @export integerIndexToDate = function(index, data) { + # Works on dates too: trust input + if (is.character(index)) + index = as.Date(index) + if (is(index,"Date")) + return (index) + index = index[1] if (is.numeric(index)) index = as.integer(index) if (!is.integer(index)) - stop("'index' should be an integer") + stop("'index' should be a date or integer") as.Date( data$getTime(index)[1] ) } -#' @title getSimilarDaysIndices +#' getSimilarDaysIndices #' -#' @description Find similar days indices in the past +#' Find similar days indices in the past; at least same type of day in the week: +#' monday=tuesday=wednesday=thursday != friday != saturday != sunday. #' #' @param index Day index (numeric or date) +#' @param data Reference dataset, object output of \code{getData} #' @param limit Maximum number of indices to return -#' @param same_seaon Should the indices correspond to day in same season? +#' @param same_season Should the indices correspond to day in same season? +#' @param days_in Optional set to intersect with results (NULL to discard) #' #' @export -getSimilarDaysIndices = function(index, limit, same_season) +getSimilarDaysIndices = function(index, data, limit, same_season, + days_in=NULL, operational=TRUE) { - index = dateIndexToInteger(index) - - #TODO: mardi similaire à lundi mercredi jeudi aussi ...etc - if (!same_season) - { - #take all similar days in recent past - nb_days = min( (index-1) %/% 7, limit) - return ( rep(index,nb_days) - 7*seq_len(nb_days) ) - } + index = dateIndexToInteger(index, data) - #Look for similar days in similar season (+/- 30 days) + # Look for similar days (optionally in same season) days = c() - i = index - while (i >= 1 && length(days) < limit) + dt_ref = as.POSIXlt(data$getTime(index)[1]) #first date-time of current day + day_ref = dt_ref$wday #1=monday, ..., 6=saturday, 0=sunday + month_ref = as.POSIXlt(data$getTime(index)[1])$mon+1 #month in 1...12 + i = index - 1 + if (!operational) + j = index + 1 + while (length(days) < min( limit, ifelse(is.null(days_in),Inf,length(days_in)) )) { - if (i < index) + if (i >= 1) { - days = c(days, i) - #look in the "future of the past" - for (j in 1:4) - days = c(days, i+7*j) + dt = as.POSIXlt(data$getTime(i)[1]) + if ((is.null(days_in) || i %in% days_in) && .isSameDay(dt$wday, day_ref)) + { + if (!same_season || .isSameSeason(dt$mon+1, month_ref)) + days = c(days, i) + } + i = i - 1 } - #...and in the "past of the past" - for (j in 1:4) + if (!operational && j <= data$getSize()) { - if (i - 7*j >= 1) - days = c(days, i-7*j) + dt = as.POSIXlt(data$getTime(j)[1]) + if ((is.null(days_in) || j %in% days_in) && .isSameDay(dt$wday, day_ref)) + { + if (!same_season || .isSameSeason(dt$mon+1, month_ref)) + days = c(days, j) + } + j = j + 1 } - # TODO: exact computation instead of -364 - # 364 = closest multiple of 7 to 365 - drift along the years... but not so many years so OK - i = i - 364 } + return ( days ) +} + +# isSameSeason +# +# Check if two months fall in the same "season" (defined by estimated pollution rate). +# +# @param month Month index to test +# @param month_ref Month to compare to +# +.isSameSeason = function(month, month_ref) +{ + if (month_ref %in% c(11,12,1,2)) #~= mid-polluted + return (month %in% c(11,12,1,2)) + if (month_ref %in% c(3,4,9,10)) #~= high-polluted + return (month %in% c(3,4,9,10)) + return (month %in% c(5,6,7,8)) #~= non polluted +} + +# isSameDay +# +# Monday=Tuesday=Wednesday=Thursday ; Friday, Saturday, Sunday: specials. +# +# @param day Day index to test +# @param day_ref Day index to compare to +# +.isSameDay = function(day, day_ref) +{ + if (day_ref %in% 1:4) + return (day %in% 1:4) + return (day == day_ref) +} - return ( days[1:min(limit,length(days))] ) +# getNoNA2 +# +# Get indices in data of no-NA series preceded by no-NA, within [first,last] range. +# +# @inheritParams dateIndexToInteger +# @param first First index (included) +# @param last Last index (included) +# +.getNoNA2 = function(data, first, last) +{ + (first:last)[ sapply(first:last, function(i) + !any( is.na(data$getSerie(i-1)) | is.na(data$getSerie(i)) ) + ) ] }