X-Git-Url: https://git.auder.net/?p=epclust.git;a=blobdiff_plain;f=epclust%2FR%2Fmain.R;h=1347fae27a623a43cf9db53a74580c85690f5313;hp=5e47f192e9bb95373f4b8ac92428d7e13d5872e9;hb=4efef8ccd1522278f53aa5ce265f3a6cfb6fbd9f;hpb=31daec0264f0ebf5e97a62c82af5e884d7dc1c43 diff --git a/epclust/R/main.R b/epclust/R/main.R index 5e47f19..1347fae 100644 --- a/epclust/R/main.R +++ b/epclust/R/main.R @@ -32,20 +32,74 @@ NULL #' @param nbytes Number of bytes to serialize a floating-point number; 4 or 8 #' @param endian Endianness to use for (de)serialization. Use "little" or "big" for portability #' -#' @return A matrix of the final medoids curves +#' @return A matrix of the final medoids curves (K2) in rows #' #' @examples -#' getData = function(start, n) { -#' con = dbConnect(drv = RSQLite::SQLite(), dbname = "mydata.sqlite") -#' df = dbGetQuery(con, paste( -#' "SELECT * FROM times_values GROUP BY id OFFSET ",start, +#' \dontrun{ +#' # WER distances computations are a bit too long for CRAN (for now) +#' +#' # Random series around cos(x,2x,3x)/sin(x,2x,3x) +#' x = seq(0,500,0.05) +#' L = length(x) #10001 +#' ref_series = matrix( c(cos(x), cos(2*x), cos(3*x), sin(x), sin(2*x), sin(3*x)), +#' byrows=TRUE, ncol=L ) +#' library(wmtsa) +#' series = do.call( rbind, lapply( 1:6, function(i) +#' do.call(rbind, wmtsa::wavBootstrap(ref_series[i,], n.realization=400)) ) ) +#' #dim(series) #c(2400,10001) +#' medoids_ascii = claws(series_RData, K1=60, K2=6, wf="d8", nb_series_per_chunk=500) +#' +#' # Same example, from CSV file +#' csv_file = "/tmp/epclust_series.csv" +#' write.table(series, csv_file, sep=",", row.names=FALSE, col.names=FALSE) +#' medoids_csv = claws(csv_file, K1=60, K2=6, wf="d8", nb_series_per_chunk=500) +#' +#' # Same example, from binary file +#' bin_file = "/tmp/epclust_series.bin" +#' nbytes = 8 +#' endian = "little" +#' epclust::serialize(csv_file, bin_file, 500, nbytes, endian) +#' getSeries = function(indices) getDataInFile(indices, bin_file, nbytes, endian) +#' medoids_bin = claws(getSeries, K1=60, K2=6, wf="d8", nb_series_per_chunk=500) +#' unlink(csv_file) +#' unlink(bin_file) +#' +#' # Same example, from SQLite database +#' library(DBI) +#' series_db <- dbConnect(RSQLite::SQLite(), "file::memory:") +#' # Prepare data.frame in DB-format +#' n = nrow(series) +#' formatted_series = data.frame( +#' ID = rep(1:n,each=L), +#' time = as.POSIXct(1800*(0:n),"GMT",origin="2001-01-01"), +#' value + + + + +#' TODO + + +#' times_values = as.data.frame(series) +#' dbWriteTable(series_db, "times_values", times_values) +#' # NOTE: assume that DB internal data is not reorganized when computing coefficients +#' indexToID_inDB <<- list() +#' getSeries = function(indices) { +#' con = dbConnect(drv = RSQLite::SQLite(), dbname = db_file) +#' if (indices %in% indexToID_inDB) +#' { +#' df = dbGetQuery(con, paste( +#' "SELECT value FROM times_values GROUP BY id OFFSET ",start, #' "LIMIT ", n, " ORDER BY date", sep="")) -#' return (df) +#' return (df) +#' } +#' else +#' { +#' ... +#' } +#' } +#' dbDisconnect(mydb) #' } -#' #####TODO: if DB, array rank --> ID at first retrieval, when computing coeffs; so:: NO use of IDs ! -#' #TODO: 3 examples, data.frame / binary file / DB sqLite -#' + sampleCurves : wavBootstrap de package wmtsa -#' cl = epclust(getData, K1=200, K2=15, ntasks=1000, nb_series_per_chunk=5000, WER="mix") #' @export claws = function(getSeries, K1, K2, random=TRUE, #randomize series order? @@ -121,6 +175,7 @@ claws = function(getSeries, K1, K2, cl = parallel::makeCluster(ncores_tasks) # 1000*K1 indices [if WER=="end"], or empty vector [if WER=="mix"] --> series on file indices = unlist( parallel::parLapply(cl, indices_tasks, function(inds) { + require("epclust", quietly=TRUE) indices_medoids = clusteringTask(inds,getCoefs,K1,nb_series_per_chunk,ncores_clust) if (WER=="mix") {