8bb378de326d11ef4df4b7c260f4e9a869e6b51e
[epclust.git] / pkg / man / de_serialize.Rd
1 % Generated by roxygen2: do not edit by hand
2 % Please edit documentation in R/de_serialize.R
3 \name{de_serialize}
4 \alias{de_serialize}
5 \alias{binarize}
6 \alias{binarizeTransform}
7 \alias{getDataInFile}
8 \alias{binarize}
9 \alias{binarizeTransform}
10 \alias{getDataInFile}
11 \title{(De)Serialization of a [big]matrix or data stream}
12 \usage{
13 binarize(data_ascii, data_bin_file, nb_per_chunk, sep = ",", nbytes = 4,
14 endian = .Platform$endian)
15
16 binarizeTransform(getData, transform, data_bin_file, nb_per_chunk, nbytes = 4,
17 endian = .Platform$endian)
18
19 getDataInFile(indices, data_bin_file, nbytes = 4, endian = .Platform$endian)
20 }
21 \arguments{
22 \item{data_ascii}{Either a matrix or CSV file, with items in rows}
23
24 \item{data_bin_file}{Name of binary file on output (\code{binarize})
25 or intput (\code{getDataInFile})}
26
27 \item{nb_per_chunk}{Number of lines to process in one batch}
28
29 \item{sep}{Separator in CSV input file (if any provided)}
30
31 \item{nbytes}{Number of bytes to serialize a floating-point number; 4 or 8}
32
33 \item{endian}{Endianness to use for (de)serialization. Use "little" or "big" for portability}
34
35 \item{getData}{Function to retrieve data chunks}
36
37 \item{transform}{Transformation function to apply on data chunks}
38
39 \item{indices}{Indices of the lines to retrieve}
40 }
41 \value{
42 For \code{getDataInFile()}, the matrix with rows corresponding to the
43 requested indices. \code{binarizeTransform} returns the number of processed lines.
44 \code{binarize} is designed to serialize in several calls, thus returns nothing.
45 }
46 \description{
47 \code{binarize()} serializes a matrix or CSV file with minimal overhead,
48 into a binary file. \code{getDataInFile()} achieves the inverse task: she retrieves
49 (ASCII) data rows from indices in the binary file. Finally,
50 \code{binarizeTransform()} serialize transformations of all data chunks; to use it,
51 a data-retrieval function must be provided, thus \code{binarize} will most likely be
52 used first (and then a function defined to seek in generated binary file)
53 }