X-Git-Url: https://git.auder.net/?p=epclust.git;a=blobdiff_plain;f=epclust%2FR%2Futils.R;h=1e4ea30f9a8297031724386e168bfd5083902602;hp=e79c00943bc89febca3e2018dbb7e5a33890f2df;hb=282342bafdc9ff65c5df98c6e2304d63b33b9fb2;hpb=3c5a4b0880db63367a474a568e1322b3999932fe

diff --git a/epclust/R/utils.R b/epclust/R/utils.R
index e79c009..1e4ea30 100644
--- a/epclust/R/utils.R
+++ b/epclust/R/utils.R
@@ -4,8 +4,8 @@
 	errWarn <- function(ignored)
 		paste("Cannot convert argument' ",substitute(x),"' to integer", sep="")
 	if (!is.integer(x))
-		tryCatch({x = as.integer(x)[1]; if (is.na(x)) stop()},
-			warning = errWarn, error = errWarn)
+		tryCatch({x <- as.integer(x)[1]; if (is.na(x)) stop()},
+			warning=errWarn, error=errWarn)
 	if (!condition(x))
 	{
 		stop(paste("Argument '",substitute(x),
@@ -20,8 +20,8 @@
 	errWarn <- function(ignored)
 		paste("Cannot convert argument' ",substitute(x),"' to logical", sep="")
 	if (!is.logical(x))
-		tryCatch({x = as.logical(x)[1]; if (is.na(x)) stop()},
-			warning = errWarn, error = errWarn)
+		tryCatch({x <- as.logical(x)[1]; if (is.na(x)) stop()},
+			warning=errWarn, error=errWarn)
 	x
 }
 
@@ -36,42 +36,43 @@
 #' @return A matrix of size log(L) x n containing contributions in columns
 #'
 #' @export
-curvesToContribs = function(series, wav_filt, contrib_type)
+curvesToContribs <- function(series, wav_filt, contrib_type)
 {
-	L = nrow(series)
-	D = ceiling( log2(L) )
+	series <- as.matrix(series)
+	L <- nrow(series)
+	D <- ceiling( log2(L) )
 	# Series are interpolated to all have length 2^D
-	nb_sample_points = 2^D
+	nb_sample_points <- 2^D
 	apply(series, 2, function(x) {
-		interpolated_curve = spline(1:L, x, n=nb_sample_points)$y
-		W = wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W
+		interpolated_curve <- spline(1:L, x, n=nb_sample_points)$y
+		W <- wavelets::dwt(interpolated_curve, filter=wav_filt, D)@W
 		# Compute the sum of squared discrete wavelet coefficients, for each scale
-		nrj = rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
+		nrj <- rev( sapply( W, function(v) ( sqrt( sum(v^2) ) ) ) )
 		if (contrib_type!="absolute")
-			nrj = nrj / sum(nrj)
+			nrj <- nrj / sum(nrj)
 		if (contrib_type=="logit")
-			nrj = - log(1 - nrj)
-		nrj
+			nrj <- - log(1 - nrj)
+		unname( nrj )
 	})
 }
 
 # Helper function to divide indices into balanced sets.
 # Ensure that all indices sets have at least min_size elements.
-.splitIndices = function(indices, nb_per_set, min_size=1)
+.splitIndices <- function(indices, nb_per_set, min_size=1)
 {
-	L = length(indices)
-	nb_workers = floor( L / nb_per_set )
-	rem = L %% nb_per_set
+	L <- length(indices)
+	nb_workers <- floor( L / nb_per_set )
+	rem <- L %% nb_per_set
 	if (nb_workers == 0 || (nb_workers==1 && rem==0))
 	{
 		# L <= nb_per_set, simple case
 		return (list(indices))
 	}
 
-	indices_workers = lapply( seq_len(nb_workers), function(i)
+	indices_workers <- lapply( seq_len(nb_workers), function(i)
 		indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
 
-	rem = L %% nb_per_set #number of remaining unassigned items
+	rem <- L %% nb_per_set #number of remaining unassigned items
 	if (rem == 0)
 		return (indices_workers)
 
@@ -81,18 +82,36 @@ curvesToContribs = function(series, wav_filt, contrib_type)
 	# get lower min_size (failure).
 	while (length(rem) < min_size)
 	{
-		index = length(rem) %% nb_workers + 1
+		index <- length(rem) %% nb_workers + 1
 		if (length(indices_workers[[index]]) <= min_size)
 		{
 			stop("Impossible to split indices properly for clustering.
 				Try increasing nb_items_clust or decreasing K1")
 		}
-		rem = c(rem, tail(indices_workers[[index]],1))
-		indices_workers[[index]] = head( indices_workers[[index]], -1)
+		rem <- c(rem, tail(indices_workers[[index]],1))
+		indices_workers[[index]] <- head( indices_workers[[index]], -1)
 	}
 	return ( c(indices_workers, list(rem) ) )
 }
 
+#' assignMedoids
+#'
+#' Find the closest medoid for each curve in input (by-columns matrix)
+#'
+#' @param curves (Chunk) of series whose medoids indices must be found
+#' @param medoids Matrix of medoids (in columns)
+#'
+#' @return The vector of integer assignments
+#' @export
+assignMedoids <- function(curves, medoids)
+{
+	nb_series <- ncol(curves)
+	mi <- rep(NA,nb_series)
+	for (i in seq_len(nb_series))
+		mi[i] <- which.min( colSums( sweep(medoids, 1, curves[,i], '-')^2 ) )
+	mi
+}
+
 #' filterMA
 #'
 #' Filter [time-]series by replacing all values by the moving average of values
@@ -103,7 +122,7 @@ curvesToContribs = function(series, wav_filt, contrib_type)
 #'
 #' @return The filtered matrix (in columns), of same size as the input
 #' @export
-filterMA = function(M_, w_)
+filterMA <- function(M_, w_)
 	.Call("filterMA", M_, w_, PACKAGE="epclust")
 
 #' cleanBin
@@ -114,7 +133,7 @@ filterMA = function(M_, w_)
 #' @export
 cleanBin <- function()
 {
-	bin_files = list.files(pattern = "*.epclust.bin", all.files=TRUE)
+	bin_files <- list.files(pattern="*.epclust.bin", all.files=TRUE)
 	for (file in bin_files)
 		unlink(file)
 }