X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=pkg%2FR%2FgetData.R;h=9ed021714c45175f40f12fae62300e06e3519abe;hb=7f90df63d1ac659d450a180b50b994a76d5fc7b9;hp=da4b4594bd30f2b761e55c37917be346cb0cb2a9;hpb=f17665c7d3da672163779da686d9f4d1ebad31f9;p=talweg.git diff --git a/pkg/R/getData.R b/pkg/R/getData.R index da4b459..9ed0217 100644 --- a/pkg/R/getData.R +++ b/pkg/R/getData.R @@ -2,8 +2,7 @@ #' #' @description Take in input data frames and/or files containing raw data, and timezones, and #' output a Data object, roughly corresponding to a list where each cell contains all value -#' for one day (see \code{?Data}). Current limitation: series (in working_tz) must start at -#' right after midnight (to keep in sync with exogenous vars) +#' for one day (see \code{?Data}). #' #' @param ts_data Time-series, as a data frame (DB style: 2 columns, first is date/time, #' second is value) or a CSV file @@ -21,7 +20,7 @@ #' @examples #' ts_data = read.csv(system.file("extdata","pm10_mesures_H_loc.csv",package="talweg")) #' exo_data = read.csv(system.file("extdata","meteo_extra_noNAs.csv",package="talweg")) -#' getData(ts_data, exo_data, input_tz="Europe/Paris", working_tz="Europe/Paris", limit=150) +#' data = getData(ts_data, exo_data, limit=120) #' @export getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:%M", working_tz="GMT", predict_at=0, limit=Inf) @@ -50,14 +49,16 @@ getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:% read.csv(ts_data) else ts_data + # Convert to the desired timezone (usually "GMT" or "Europe/Paris") + formatted_dates_POSIXlt = strptime(as.character(ts_df[,1]), date_format, tz=input_tz) + ts_df[,1] = format(as.POSIXct(formatted_dates_POSIXlt, tz=input_tz), tz=working_tz, usetz=TRUE) + exo_df = if (is.character(exo_data)) read.csv(exo_data) else exo_data - # Convert to the desired timezone (usually "GMT" or "Europe/Paris") - formatted_dates_POSIXlt = strptime(as.character(ts_df[,1]), date_format, tz=input_tz) - ts_df[,1] = format(as.POSIXct(formatted_dates_POSIXlt), tz=working_tz, usetz=TRUE) + # Times in exogenous variables file are ignored: no conversions required line = 1 #index in PM10 file (24 lines for 1 cell) nb_lines = nrow(ts_df) @@ -88,13 +89,12 @@ getData = function(ts_data, exo_data, input_tz="GMT", date_format="%d/%m/%Y %H:% break i = i + 1 } - if (length(data$getCenteredSerie(1)) < length(data$getCenteredSerie(1))) + if (length(data$getCenteredSerie(1)) < length(data$getCenteredSerie(2))) data$removeFirst() - if (length(data$getCenteredSerie( data$getSize() )) < - length(data$getCenteredSerie( data$getSize()-1 ))) + if (length(data$getCenteredSerie(data$getSize())) + < length(data$getCenteredSerie(data$getSize()-1))) { data$removeLast() } - data }