#TODO: setRefClass... to avoid copy data !! #http://stackoverflow.com/questions/2603184/r-pass-by-reference #fields: data (can be NULL or provided by user), coeffs (will be computed #con can be a character string naming a file; see readLines() #data can be in DB format, on one column : TODO: guess (from header, or col. length...) epclust = function(data=NULL, con=NULL, raw=FALSE, K, nbPerChunk, ..., where_to_store_tmp_data, and how ?) #options for tmp files: in RAM, on disk, on DB (can be distributed) { #on input: can be data or con; data handled by writing it to file (ascii or bin ?!), if (!is.null(data)) { #full data matrix index = 1 n = nrow(data) while (index < n) { getCoeffs(data index = index + nbSeriesPerChunk } } else if (!is.null(con)) { #incremental connection #read it one by one and get coeffs until nbSeriesPerChunk #then launch a clustering task............ readLines() } else stop("at least 'data' or 'con' argument must be present") } getCoeffs = function(series) { #... return wavelets coeffs : compute in parallel ! }