X-Git-Url: https://git.auder.net/?a=blobdiff_plain;ds=sidebyside;f=pkg%2Fman%2Fde_serialize.Rd;fp=pkg%2Fman%2Fde_serialize.Rd;h=8bb378de326d11ef4df4b7c260f4e9a869e6b51e;hb=e906736ea27105237e84c904dce6170353726292;hp=0000000000000000000000000000000000000000;hpb=57f337af19cd6251815bb1ff2d62f4c58e8b6078;p=epclust.git diff --git a/pkg/man/de_serialize.Rd b/pkg/man/de_serialize.Rd new file mode 100644 index 0000000..8bb378d --- /dev/null +++ b/pkg/man/de_serialize.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/de_serialize.R +\name{de_serialize} +\alias{de_serialize} +\alias{binarize} +\alias{binarizeTransform} +\alias{getDataInFile} +\alias{binarize} +\alias{binarizeTransform} +\alias{getDataInFile} +\title{(De)Serialization of a [big]matrix or data stream} +\usage{ +binarize(data_ascii, data_bin_file, nb_per_chunk, sep = ",", nbytes = 4, + endian = .Platform$endian) + +binarizeTransform(getData, transform, data_bin_file, nb_per_chunk, nbytes = 4, + endian = .Platform$endian) + +getDataInFile(indices, data_bin_file, nbytes = 4, endian = .Platform$endian) +} +\arguments{ +\item{data_ascii}{Either a matrix or CSV file, with items in rows} + +\item{data_bin_file}{Name of binary file on output (\code{binarize}) +or intput (\code{getDataInFile})} + +\item{nb_per_chunk}{Number of lines to process in one batch} + +\item{sep}{Separator in CSV input file (if any provided)} + +\item{nbytes}{Number of bytes to serialize a floating-point number; 4 or 8} + +\item{endian}{Endianness to use for (de)serialization. Use "little" or "big" for portability} + +\item{getData}{Function to retrieve data chunks} + +\item{transform}{Transformation function to apply on data chunks} + +\item{indices}{Indices of the lines to retrieve} +} +\value{ +For \code{getDataInFile()}, the matrix with rows corresponding to the + requested indices. \code{binarizeTransform} returns the number of processed lines. + \code{binarize} is designed to serialize in several calls, thus returns nothing. +} +\description{ +\code{binarize()} serializes a matrix or CSV file with minimal overhead, + into a binary file. \code{getDataInFile()} achieves the inverse task: she retrieves + (ASCII) data rows from indices in the binary file. Finally, + \code{binarizeTransform()} serialize transformations of all data chunks; to use it, + a data-retrieval function must be provided, thus \code{binarize} will most likely be + used first (and then a function defined to seek in generated binary file) +}