| 1 | % Generated by roxygen2: do not edit by hand |
| 2 | % Please edit documentation in R/de_serialize.R |
| 3 | \name{de_serialize} |
| 4 | \alias{de_serialize} |
| 5 | \alias{binarize} |
| 6 | \alias{binarizeTransform} |
| 7 | \alias{getDataInFile} |
| 8 | \alias{binarize} |
| 9 | \alias{binarizeTransform} |
| 10 | \alias{getDataInFile} |
| 11 | \title{(De)Serialization of a [big]matrix or data stream} |
| 12 | \usage{ |
| 13 | binarize(data_ascii, data_bin_file, nb_per_chunk, sep = ",", nbytes = 4, |
| 14 | endian = .Platform$endian) |
| 15 | |
| 16 | binarizeTransform(getData, transform, data_bin_file, nb_per_chunk, nbytes = 4, |
| 17 | endian = .Platform$endian) |
| 18 | |
| 19 | getDataInFile(indices, data_bin_file, nbytes = 4, endian = .Platform$endian) |
| 20 | } |
| 21 | \arguments{ |
| 22 | \item{data_ascii}{Either a matrix or CSV file, with items in rows} |
| 23 | |
| 24 | \item{data_bin_file}{Name of binary file on output (\code{binarize}) |
| 25 | or intput (\code{getDataInFile})} |
| 26 | |
| 27 | \item{nb_per_chunk}{Number of lines to process in one batch} |
| 28 | |
| 29 | \item{sep}{Separator in CSV input file (if any provided)} |
| 30 | |
| 31 | \item{nbytes}{Number of bytes to serialize a floating-point number; 4 or 8} |
| 32 | |
| 33 | \item{endian}{Endianness to use for (de)serialization. Use "little" or "big" for portability} |
| 34 | |
| 35 | \item{getData}{Function to retrieve data chunks} |
| 36 | |
| 37 | \item{transform}{Transformation function to apply on data chunks} |
| 38 | |
| 39 | \item{indices}{Indices of the lines to retrieve} |
| 40 | } |
| 41 | \value{ |
| 42 | For \code{getDataInFile()}, the matrix with rows corresponding to the |
| 43 | requested indices. \code{binarizeTransform} returns the number of processed lines. |
| 44 | \code{binarize} is designed to serialize in several calls, thus returns nothing. |
| 45 | } |
| 46 | \description{ |
| 47 | \code{binarize()} serializes a matrix or CSV file with minimal overhead, |
| 48 | into a binary file. \code{getDataInFile()} achieves the inverse task: she retrieves |
| 49 | (ASCII) data rows from indices in the binary file. Finally, |
| 50 | \code{binarizeTransform()} serialize transformations of all data chunks; to use it, |
| 51 | a data-retrieval function must be provided, thus \code{binarize} will most likely be |
| 52 | used first (and then a function defined to seek in generated binary file) |
| 53 | } |