X-Git-Url: https://git.auder.net/?p=epclust.git;a=blobdiff_plain;f=epclust%2FR%2Futils.R;h=1e4ea30f9a8297031724386e168bfd5083902602;hp=e79c00943bc89febca3e2018dbb7e5a33890f2df;hb=282342bafdc9ff65c5df98c6e2304d63b33b9fb2;hpb=3c5a4b0880db63367a474a568e1322b3999932fe diff --git a/epclust/R/utils.R b/epclust/R/utils.R index e79c009..1e4ea30 100644 --- a/epclust/R/utils.R +++ b/epclust/R/utils.R @@ -4,8 +4,8 @@ errWarn <- function(ignored) paste("Cannot convert argument' ",substitute(x),"' to integer", sep="") if (!is.integer(x)) - tryCatch({x = as.integer(x)[1]; if (is.na(x)) stop()}, - warning = errWarn, error = errWarn) + tryCatch({x <- as.integer(x)[1]; if (is.na(x)) stop()}, + warning=errWarn, error=errWarn) if (!condition(x)) { stop(paste("Argument '",substitute(x), @@ -20,8 +20,8 @@ errWarn <- function(ignored) paste("Cannot convert argument' ",substitute(x),"' to logical", sep="") if (!is.logical(x)) - tryCatch({x = as.logical(x)[1]; if (is.na(x)) stop()}, - warning = errWarn, error = errWarn) + tryCatch({x <- as.logical(x)[1]; if (is.na(x)) stop()}, + warning=errWarn, error=errWarn) x } @@ -36,42 +36,43 @@ #' @return A matrix of size log(L) x n containing contributions in columns #' #' @export -curvesToContribs = function(series, wav_filt, contrib_type) +curvesToContribs <- function(series, wav_filt, contrib_type) { - L = nrow(series) - D = ceiling( log2(L) ) + series <- as.matrix(series) + L <- nrow(series) + D <- ceiling( log2(L) ) # Series are interpolated to all have length 2^D - nb_sample_points = 2^D + nb_sample_points <- 2^D apply(series, 2, function(x) { - interpolated_curve = spline(1:L, x, n=nb_sample_points)$y - W = wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W + interpolated_curve <- spline(1:L, x, n=nb_sample_points)$y + W <- wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W # Compute the sum of squared discrete wavelet coefficients, for each scale - nrj = rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) ) + nrj <- rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) ) if (contrib_type!="absolute") - nrj = nrj / sum(nrj) + nrj <- nrj / sum(nrj) if (contrib_type=="logit") - nrj = - log(1 - nrj) - nrj + nrj <- - log(1 - nrj) + unname( nrj ) }) } # Helper function to divide indices into balanced sets. # Ensure that all indices sets have at least min_size elements. -.splitIndices = function(indices, nb_per_set, min_size=1) +.splitIndices <- function(indices, nb_per_set, min_size=1) { - L = length(indices) - nb_workers = floor( L / nb_per_set ) - rem = L %% nb_per_set + L <- length(indices) + nb_workers <- floor( L / nb_per_set ) + rem <- L %% nb_per_set if (nb_workers == 0 || (nb_workers==1 && rem==0)) { # L <= nb_per_set, simple case return (list(indices)) } - indices_workers = lapply( seq_len(nb_workers), function(i) + indices_workers <- lapply( seq_len(nb_workers), function(i) indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] ) - rem = L %% nb_per_set #number of remaining unassigned items + rem <- L %% nb_per_set #number of remaining unassigned items if (rem == 0) return (indices_workers) @@ -81,18 +82,36 @@ curvesToContribs = function(series, wav_filt, contrib_type) # get lower min_size (failure). while (length(rem) < min_size) { - index = length(rem) %% nb_workers + 1 + index <- length(rem) %% nb_workers + 1 if (length(indices_workers[[index]]) <= min_size) { stop("Impossible to split indices properly for clustering. Try increasing nb_items_clust or decreasing K1") } - rem = c(rem, tail(indices_workers[[index]],1)) - indices_workers[[index]] = head( indices_workers[[index]], -1) + rem <- c(rem, tail(indices_workers[[index]],1)) + indices_workers[[index]] <- head( indices_workers[[index]], -1) } return ( c(indices_workers, list(rem) ) ) } +#' assignMedoids +#' +#' Find the closest medoid for each curve in input (by-columns matrix) +#' +#' @param curves (Chunk) of series whose medoids indices must be found +#' @param medoids Matrix of medoids (in columns) +#' +#' @return The vector of integer assignments +#' @export +assignMedoids <- function(curves, medoids) +{ + nb_series <- ncol(curves) + mi <- rep(NA,nb_series) + for (i in seq_len(nb_series)) + mi[i] <- which.min( colSums( sweep(medoids, 1, curves[,i], '-')^2 ) ) + mi +} + #' filterMA #' #' Filter [time-]series by replacing all values by the moving average of values @@ -103,7 +122,7 @@ curvesToContribs = function(series, wav_filt, contrib_type) #' #' @return The filtered matrix (in columns), of same size as the input #' @export -filterMA = function(M_, w_) +filterMA <- function(M_, w_) .Call("filterMA", M_, w_, PACKAGE="epclust") #' cleanBin @@ -114,7 +133,7 @@ filterMA = function(M_, w_) #' @export cleanBin <- function() { - bin_files = list.files(pattern = "*.epclust.bin", all.files=TRUE) + bin_files <- list.files(pattern="*.epclust.bin", all.files=TRUE) for (file in bin_files) unlink(file) }