From: Benjamin Auder Date: Fri, 6 Jan 2017 03:04:11 +0000 (+0100) Subject: work on draft R package X-Git-Url: https://git.auder.net/images/%7B%7B%20asset%28%27mixstore/css/%7B%7B?a=commitdiff_plain;h=3dcbfeef0dc92444287dd78a16c80e58a98a6ee7;p=epclust.git work on draft R package --- diff --git a/code/draft_R_pkg/R/main.R b/code/draft_R_pkg/R/main.R index 4120b39..3411720 100644 --- a/code/draft_R_pkg/R/main.R +++ b/code/draft_R_pkg/R/main.R @@ -1,26 +1,39 @@ -epclust = function(data=NULL, con=NULL, raw=FALSE, K, nbPerChunk, ...) +#TODO: setRefClass... to avoid copy data !! +#http://stackoverflow.com/questions/2603184/r-pass-by-reference + +#fields: data (can be NULL or provided by user), coeffs (will be computed +#con can be a character string naming a file; see readLines() +#data can be in DB format, on one column : TODO: guess (from header, or col. length...) +epclust = function(data=NULL, con=NULL, raw=FALSE, K, nbPerChunk, ..., where_to_store_tmp_data, and how ?) +#options for tmp files: in RAM, on disk, on DB (can be distributed) { -#TODO: just a wrapper which calls ppam.exe (system("...")) and reads output (binary) file to retrieve medoids + IDs #on input: can be data or con; data handled by writing it to file (ascii or bin ?!), - #con handled - - - #options for tmp files: in RAM, on disk, on DB (can be distributed) - if (!is.null(data)) { #full data matrix - + index = 1 + n = nrow(data) + while (index < n) + { + getCoeffs(data + index = index + nbSeriesPerChunk + } } else if (!is.null(con)) { #incremental connection #read it one by one and get coeffs until nbSeriesPerChunk #then launch a clustering task............ + readLines() } else stop("at least 'data' or 'con' argument must be present") } + +getCoeffs = function(series) +{ + #... return wavelets coeffs : compute in parallel ! +}