-#fields: data (can be NULL or provided by user), coeffs (will be computed
-#con can be a character string naming a file; see readLines()
-#data can be in DB format, on one column : TODO: guess (from header, or col. length...)
-
-
-writeTmp(curves [uncompressed coeffs, limited number - nbSeriesPerChunk], last=FALSE) #if last=TRUE, close the conn
-readTmp(..., from index, n curves) #careful: connection must remain open
-#TODO: write read/write tmp reference ( on file in .tmp/ folder ... )
-
-#data:
-#stop("Unrecognizable 'data' argument (must be numeric, functional or connection)")
-
-#WER: "end" to apply stage 2 after stage 1 iterated, "mix" (or anything else...?!) to apply it after every stage 1
-epclust = function(data, K, nbPerChunk, WER="end", ncores=NULL, writeTmp=ref_writeTmp, readTmp=ref_readTmp) #where to put/retrieve intermediate results; if not provided, use file on disk
+#' @title Cluster power curves with PAM in parallel
+#'
+#' @description Groups electricity power curves (or any series of similar nature) by applying PAM
+#' algorithm in parallel to chunks of size \code{nbSeriesPerChunk}
+#'
+#' @param data Access to the data, which can be of one of the three following types:
+#' \itemize{
+#' \item data.frame: each line contains its ID in the first cell, and all values after
+#' \item connection: any R connection object (e.g. a file) providing lines as described above
+#' \item function: a custom way to retrieve the curves; it has two arguments: the start index
+#' (start) and number of curves (n); see example in package vignette.
+#' }
+#' @param K Number of clusters
+#' @param nbSeriesPerChunk Number of series in each group
+#' @param writeTmp Function to write temporary wavelets coefficients (+ identifiers);
+#' see defaults in defaults.R
+#' @param readTmp Function to read temporary wavelets coefficients (see defaults.R)
+#' @param WER "end" to apply stage 2 after stage 1 has iterated and finished, or "mix"
+#' to apply it after every stage 1
+#' @param ncores number of parallel processes; if NULL, use parallel::detectCores()
+#'
+#' @return A data.frame of the final medoids curves (identifiers + values)
+epclust = function(data, K, nbSeriesPerChunk, writeTmp=ref_writeTmp, readTmp=ref_readTmp,
+ WER="end", ncores=NULL)