Commit | Line | Data |
---|---|---|
3dcbfeef BA |
1 | #TODO: setRefClass... to avoid copy data !! |
2 | #http://stackoverflow.com/questions/2603184/r-pass-by-reference | |
3 | ||
4 | #fields: data (can be NULL or provided by user), coeffs (will be computed | |
5 | #con can be a character string naming a file; see readLines() | |
6 | #data can be in DB format, on one column : TODO: guess (from header, or col. length...) | |
3d061515 BA |
7 | |
8 | ||
9 | writeTmp(curves [uncompressed coeffs, limited number - nbSeriesPerChunk], last=FALSE) #if last=TRUE, close the conn | |
10 | readTmp(..., from index, n curves) #careful: connection must remain open | |
11 | #TODO: write read/write tmp reference ( on file in .tmp/ folder ... ) | |
12 | ||
13 | epclust = function(data=NULL, K, nbPerChunk, ..., writeTmp=ref_writeTmp, readTmp=ref_readTmp) #where to put/retrieve intermediate results; if not provided, use file on disk | |
ac1d4231 BA |
14 | { |
15 | ||
16 | ||
ac1d4231 | 17 | #on input: can be data or con; data handled by writing it to file (ascii or bin ?!), |
3d061515 | 18 | #data: con or matrix or DB |
ac1d4231 | 19 | |
3d061515 BA |
20 | #1) acquire data (process curves, get as coeffs) |
21 | if (is.numeric(data)) | |
ac1d4231 BA |
22 | { |
23 | #full data matrix | |
3dcbfeef BA |
24 | index = 1 |
25 | n = nrow(data) | |
26 | while (index < n) | |
27 | { | |
3d061515 | 28 | writeTmp( getCoeffs(data) ) |
3dcbfeef BA |
29 | index = index + nbSeriesPerChunk |
30 | } | |
3d061515 BA |
31 | } else if (is.function(data)) |
32 | { | |
33 | #custom user function to retrieve next n curves, probably to read from DB | |
34 | writeTmp( getCoeffs( data(nbPerChunk) ) ) | |
35 | } else | |
ac1d4231 BA |
36 | { |
37 | #incremental connection | |
38 | #read it one by one and get coeffs until nbSeriesPerChunk | |
39 | #then launch a clustering task............ | |
3d061515 BA |
40 | ascii_lines = readLines(data, nbSeriesPerChunk) |
41 | seriesChunkFile = ".tmp/seriesChunk" #TODO: find a better way | |
42 | writeLines(ascii_lines, seriesChunkFile) | |
43 | writeTmp( getCoeffs( read.csv(seriesChunkFile) ) ) | |
ac1d4231 | 44 | } else |
3d061515 BA |
45 | stop("Unrecognizable 'data' argument (must be numeric, functional or connection)") |
46 | ||
47 | #2) process coeffs (by nbSeriesPerChunk) and cluster in parallel (just launch async task, wait for them to complete, and re-do if necessary) | |
48 | ||
ac1d4231 | 49 | |
3d061515 | 50 | #3) apply stage 2 (in parallel ? inside task 2) ?) |
ac1d4231 | 51 | } |
3dcbfeef BA |
52 | |
53 | getCoeffs = function(series) | |
54 | { | |
55 | #... return wavelets coeffs : compute in parallel ! | |
56 | } |