epclust/R/computeCoeffs.R

   1 computeCoeffs = function(data, index, nb_series_per_chunk, wf)
   2 {
   3     coeffs_chunk = NULL
   4     if (is.data.frame(data) && index < nrow(data))
   5     {
   6         #full data matrix
   7         coeffs_chunk = curvesToCoeffs(
   8             data[index:(min(index+nb_series_per_chunk-1,nrow(data))),], wf)
   9     }
  10     else if (is.function(data))
  11     {
  12         #custom user function to retrieve next n curves, probably to read from DB
  13         coeffs_chunk = curvesToCoeffs( data(rank=(index-1)+seq_len(nb_series_per_chunk)), wf )
  14     }
  15     else if (exists(data_con))
  16     {
  17         #incremental connection ; TODO: more efficient way to parse than using a temp file
  18         ascii_lines = readLines(data_con, nb_series_per_chunk)
  19         if (length(ascii_lines > 0))
  20         {
  21             series_chunk_file = ".series_chunk"
  22             writeLines(ascii_lines, series_chunk_file)
  23             coeffs_chunk = curvesToCoeffs( read.csv(series_chunk_file), wf )
  24             unlink(series_chunk_file)
  25         }
  26     }
  27     coeffs_chunk
  28 }
  29
  30 #NOTE: always keep ID in first column (...? is it good ?!)
  31 curvesToCoeffs = function(series, wf)
  32 {
  33     if (!require(wavelets, quietly=TRUE))
  34         stop("Couldn't load wavelets library")
  35     L = length(series[1,])
  36     D = ceiling( log2(L-1) )
  37     nb_sample_points = 2^D
  38     #TODO: parallel::parApply() ?!
  39     res = apply(series, 1, function(x) {
  40         interpolated_curve = spline(1:(L-1), x[2:L], n=nb_sample_points)$y
  41         W = wavelets::dwt(interpolated_curve, filter=wf, D)@W
  42         nrj_coeffs = rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
  43         return ( c(x[1], nrj_coeffs) )
  44     })
  45     return (as.data.frame(res))
  46 }