- data_bin = file(data_bin_file, "rb")
- data_size = file.info(data_bin)$size
- data_length = readBin(data_bin, "integer", 1, 8, endian)
- #Ou t(sapply(...)) (+ rapide ?)
- data_ascii = do.call( rbind, lapply( indices, function(i) {
- offset = 8+(i-1)*data_length*nbytes
- if (offset > data_size)
- return (vector("double",0))
- ignored = seek(data_bin, offset)
- readBin(data_bin, "double", n=data_length, size=nbytes)
+ nb_items <- 0 #side-effect: store the number of transformed items
+ index <- 1
+ repeat
+ {
+ # Retrieve a chunk of data in a binary file (generally obtained by binarize())
+ data_chunk <- getData((index-1)+seq_len(nb_per_chunk))
+ if (is.null(data_chunk))
+ break
+
+ # Apply transformation on the current chunk (by columns)
+ transformed_chunk <- transform(data_chunk)
+
+ # Save the result in binary format
+ binarize(transformed_chunk, data_bin_file, nb_per_chunk, ",", nbytes, endian)
+
+ index <- index + nb_per_chunk
+ nb_items <- nb_items + ncol(data_chunk)
+ }
+ nb_items #number of transformed items
+}
+
+#' @rdname de_serialize
+#' @export
+getDataInFile <- function(indices, data_bin_file, nbytes=4, endian=.Platform$endian)
+{
+ data_bin <- file(data_bin_file, "rb") #source binary file
+
+ data_size <- file.info(data_bin_file)$size #number of bytes in the file
+ # data_length: length of a vector in the binary file (first element, 8 bytes)
+ data_length <- readBin(data_bin, "integer", n=1, size=8, endian=endian)
+
+ # Seek all 'indices' columns in the binary file, using data_length and nbytes
+ # to compute the offset ( index i at 8 + i*data_length*nbytes )
+ data_ascii <- do.call( cbind, lapply( indices, function(i) {
+ offset <- 8+(i-1)*data_length*nbytes
+ if (offset >= data_size)
+ return (NULL)
+ ignored <- seek(data_bin, offset) #position cursor at computed offset
+ readBin(data_bin, "double", n=data_length, size=nbytes, endian=endian)