complete first draft of package
[epclust.git] / epclust / R / utils.R
index 8f7da38..40b0a18 100644 (file)
@@ -10,35 +10,66 @@ toInteger <- function(x, condition)
        x
 }
 
-#TODO: merge these 2 next ?!
-serialize = function(coeffs)
+curvesToCoeffs = function(series, wf)
 {
-       #.........
-       #C function (from data.frame, type of IDs ??! force integers ? [yes])
-       #return raw vector
-}
-appendBinary = function(.......)
-{
-       #take raw vector, append it (binary mode) to a file
+       L = length(series[1,])
+       D = ceiling( log2(L) )
+       nb_sample_points = 2^D
+       apply(series, 1, function(x) {
+               interpolated_curve = spline(1:L, x, n=nb_sample_points)$y
+               W = wavelets::dwt(interpolated_curve, filter=wf, D)@W
+               rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
+       })
 }
 
-#finalizeSerialization = function(...)
-#{
-#      #write number of series, and length of each...
-#}
-
-deserialize = function(coeffs, range)
+#data: matrix of double or connection
+serialize = function(data, file, type, nb_per_chunk)
 {
-       #......
-       #C function (from file name)
+       bin_data = file(file, "ab")
+       #write data length on first call
+       nbytes = ifelse(type=="double",8,4)
+       first_write = FALSE
+       if (file.info(file)$size == 0)
+       {
+               #number of items always on 8 bytes
+               writeBin(0L, bin_data, size=8) #,endian="little")
+               first_write = TRUE
+       }
+       if (is.matrix(data))
+       {
+               writeBin(t(data), bin_data, size=nbytes)
+               data_length = ncol(data)
+       }
+       else #if (is(data, "connection"))
+       {
+               if (first_write)
+               {
+                       data_line = scan(data, double(), sep=",", nlines=1)
+                       writeBin(data_line, bin_data, size=nbytes)
+                       data_length = length(data_line)
+               }
+               repeat
+               {
+                       data_chunk = scan(data, double(), sep=",", nlines=nb_per_chunk)
+                       if (length(data_chunk)==0)
+                               break
+                       writeBin(data_chunk, bin_data, size=nbytes)
+               }
+       }
+       if (first_write)
+       {
+               #ecrire file_size-1 / (nbytes*nbWritten) en 0 dans bin_data ! ignored == file_size
+               ignored = seek(bin_data, 0)
+               writeBin(data_length, bin_data, size=8)
+       }
+       close(bin_data)
 }
 
-getSeries(data, rank=NULL, id=NULL)
+#TODO: read in binary file, always same structure
+getDataFromFile(indices, file, type)
 {
-       #TODO:
-}
-
-getCoeffs(.....) #FROM BINARY FILE !!!
-{
-
+       bin_data = file(file, "rb")
+       nbytes = ifelse(type=="double",8,4)
+       data_length = readBin(bin_data,"double",1,nbytes) #,endian="little")
+       t(sapply(indices, function(i) readBin(bin_data,"double",n=data_length,size=nbytes)))
 }