Commit | Line | Data |
---|---|---|
4bcfdbee BA |
1 | #' @name de_serialize |
2 | #' @rdname de_serialize | |
3 | #' @aliases binarize getDataInFile | |
4 | #' | |
5 | #' @title (De)Serialization of a matrix | |
6 | #' | |
7 | #' @description \code{binarize()} serializes a matrix or CSV file with minimal overhead, | |
8 | #' into a binary file. \code{getDataInFile()} achieves the inverse task: she retrieves | |
9 | #' (ASCII) data rows from indices in the binary file | |
10 | #' | |
11 | #' @param data_ascii Either a matrix or CSV file, with items in rows | |
12 | #' @param indices Indices of the lines to retrieve | |
13 | #' @param data_bin_file Name of binary file on output (\code{binarize}) | |
14 | #' or intput (\code{getDataInFile}) | |
15 | #' @param nb_per_chunk Number of lines to process in one batch | |
16 | #' @inheritParams claws | |
17 | #' | |
18 | #' @return For \code{getDataInFile()}, the matrix with rows corresponding to the | |
19 | #' requested indices | |
20 | NULL | |
21 | ||
22 | #' @rdname de_serialize | |
23 | #' @export | |
24 | binarize = function(data_ascii, data_bin_file, nb_per_chunk, | |
56857861 BA |
25 | sep=",", nbytes=4, endian=.Platform$endian) |
26 | { | |
27 | if (is.character(data_ascii)) | |
28 | data_ascii = file(data_ascii, open="r") | |
4bcfdbee | 29 | else if (methods::is(data_ascii,"connection") && !isOpen(data_ascii)) |
56857861 BA |
30 | open(data_ascii) |
31 | ||
32 | first_write = (!file.exists(data_bin_file) || file.info(data_bin_file)$size == 0) | |
33 | data_bin = file(data_bin_file, open=ifelse(first_write,"wb","ab")) | |
34 | ||
35 | #write data length on first call | |
36 | if (first_write) | |
37 | { | |
38 | #number of items always on 8 bytes | |
4bcfdbee | 39 | writeBin(0L, data_bin, size=8, endian=endian) |
56857861 BA |
40 | if (is.matrix(data_ascii)) |
41 | data_length = ncol(data_ascii) | |
42 | else #if (is(data, "connection")) | |
43 | { | |
44 | data_line = scan(data_ascii, double(), sep=sep, nlines=1, quiet=TRUE) | |
4bcfdbee | 45 | writeBin(data_line, data_bin, size=nbytes, endian=endian) |
56857861 BA |
46 | data_length = length(data_line) |
47 | } | |
48 | } | |
49 | ||
50 | if (is.matrix(data_ascii)) | |
51 | index = 1 | |
52 | repeat | |
53 | { | |
54 | if (is.matrix(data_ascii)) | |
55 | { | |
56 | range = index:min(nrow(data_ascii),index+nb_per_chunk) | |
57 | data_chunk = | |
58 | if (range[1] <= nrow(data_ascii)) | |
59 | as.double(t(data_ascii[range,])) | |
60 | else | |
61 | integer(0) | |
62 | index = index + nb_per_chunk | |
63 | } | |
64 | else | |
65 | data_chunk = scan(data_ascii, double(), sep=sep, nlines=nb_per_chunk, quiet=TRUE) | |
66 | if (length(data_chunk)==0) | |
67 | break | |
4bcfdbee | 68 | writeBin(data_chunk, data_bin, size=nbytes, endian=endian) |
56857861 BA |
69 | } |
70 | ||
71 | if (first_write) | |
72 | { | |
73 | #ecrire file_size-1 / (nbytes*nbWritten) en 0 dans bin_data ! ignored == file_size | |
74 | ignored = seek(data_bin, 0) | |
4bcfdbee | 75 | writeBin(data_length, data_bin, size=8, endian=endian) |
56857861 BA |
76 | } |
77 | close(data_bin) | |
78 | ||
4bcfdbee | 79 | if (methods::is(data_ascii,"connection")) |
56857861 BA |
80 | close(data_ascii) |
81 | } | |
82 | ||
4bcfdbee BA |
83 | #' @rdname de_serialize |
84 | #' @export | |
56857861 BA |
85 | getDataInFile = function(indices, data_bin_file, nbytes=4, endian=.Platform$endian) |
86 | { | |
87 | data_bin = file(data_bin_file, "rb") | |
4bcfdbee BA |
88 | data_size = file.info(data_bin_file)$size |
89 | data_length = readBin(data_bin, "integer", n=1, size=8, endian=endian) | |
56857861 BA |
90 | #Ou t(sapply(...)) (+ rapide ?) |
91 | data_ascii = do.call( rbind, lapply( indices, function(i) { | |
8702eb86 BA |
92 | offset = 8+(i-1)*data_length*nbytes |
93 | if (offset > data_size) | |
94 | return (vector("double",0)) | |
95 | ignored = seek(data_bin, offset) | |
4bcfdbee | 96 | readBin(data_bin, "double", n=data_length, size=nbytes, endian=endian) |
56857861 BA |
97 | } ) ) |
98 | close(data_bin) | |
8702eb86 | 99 | if (ncol(data_ascii)>0) data_ascii else NULL |
56857861 | 100 | } |