[epclust.git] / epclust / R / clustering.R

#' @name clustering
#' @rdname clustering
#' @aliases clusteringTask1 clusteringTask2 computeClusters1 computeClusters2
#'
#' @title Two-stage clustering, withing one task (see \code{claws()})
#'
#' @description \code{clusteringTask1()} runs one full stage-1 task, which consists in
#'   iterated stage 1 clustering (on nb_curves / ntasks energy contributions, computed
#'   through discrete wavelets coefficients).
#'   \code{clusteringTask2()} runs a full stage-2 task, which consists in synchrones
#'   and then WER distances computations, before applying the clustering algorithm.
#'   \code{computeClusters1()} and \code{computeClusters2()} correspond to the atomic
#'   clustering procedures respectively for stage 1 and 2. The former applies the
#'   first clustering algorithm on a contributions matrix, while the latter clusters
#'   a set of series inside one task (~nb_items_clust1)
#'
#' @param indices Range of series indices to cluster in parallel (initial data)
#' @param getContribs Function to retrieve contributions from initial series indices:
#'   \code{getContribs(indices)} outpus a contributions matrix
#' @inheritParams computeSynchrones
#' @inheritParams claws
#'
#' @return For \code{clusteringTask1()}, the indices of the computed (K1) medoids.
#'   Indices are irrelevant for stage 2 clustering, thus \code{clusteringTask2()}
#'   outputs a big.matrix of medoids (of size LxK2, K2 = final number of clusters)
NULL

#' @rdname clustering
#' @export
clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_items_clust1,
	ncores_clust=1, verbose=FALSE, parll=TRUE)
{
	if (parll)
	{
		cl = parallel::makeCluster(ncores_clust, outfile = "")
		parallel::clusterExport(cl, varlist=c("getContribs","K1","verbose"), envir=environment())
	}
	while (length(indices) > K1)
	{
		indices_workers = .spreadIndices(indices, nb_items_clust1)
		if (verbose)
			cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep=""))
		indices <-
			if (parll)
			{
				unlist( parallel::parLapply(cl, indices_workers, function(inds) {
					require("epclust", quietly=TRUE)
					inds[ algoClust1(getContribs(inds), K1) ]
				}) )
			}
			else
			{
				unlist( lapply(indices_workers, function(inds)
					inds[ algoClust1(getContribs(inds), K1) ]
				) )
			}
	}
	if (parll)
		parallel::stopCluster(cl)

	indices #medoids
}

#' @rdname clustering
#' @export
clusteringTask2 = function(medoids, K2, algoClust2, getRefSeries, nb_ref_curves,
	nb_series_per_chunk, sync_mean, nbytes,endian,ncores_clust=1,verbose=FALSE,parll=TRUE)
{
	if (verbose)
		cat(paste("*** Clustering task 2 on ",ncol(medoids)," synchrones\n", sep=""))

	if (ncol(medoids) <= K2)
		return (medoids)
	synchrones = computeSynchrones(medoids, getRefSeries, nb_ref_curves,
		nb_series_per_chunk, sync_mean, ncores_clust, verbose, parll)
	distances = computeWerDists(synchrones, nbytes, endian, ncores_clust, verbose, parll)
	if (verbose)
		cat(paste("   algoClust2() on ",nrow(distances)," items\n", sep=""))
	medoids[ ,algoClust2(distances,K2) ]
}

#' computeSynchrones
#'
#' Compute the synchrones curves (sum of clusters elements) from a matrix of medoids,
#' using L2 distances.
#'
#' @param medoids big.matrix of medoids (curves of same length as initial series)
#' @param getRefSeries Function to retrieve initial series (e.g. in stage 2 after series
#'   have been replaced by stage-1 medoids)
#' @param nb_ref_curves How many reference series? (This number is known at this stage)
#' @inheritParams claws
#'
#' @return A big.matrix of size L x K1 where L = length of a serie
#'
#' @export
computeSynchrones = function(medoids, getRefSeries, nb_ref_curves,
	nb_series_per_chunk, sync_mean, ncores_clust=1,verbose=FALSE,parll=TRUE)
{
	computeSynchronesChunk = function(indices)
	{
		if (parll)
		{
			require("bigmemory", quietly=TRUE)
			requireNamespace("synchronicity", quietly=TRUE)
			require("epclust", quietly=TRUE)
			synchrones <- bigmemory::attach.big.matrix(synchrones_desc)
			if (sync_mean)
				counts <- bigmemory::attach.big.matrix(counts_desc)
			medoids <- bigmemory::attach.big.matrix(medoids_desc)
			m <- synchronicity::attach.mutex(m_desc)
		}

		ref_series = getRefSeries(indices)
		nb_series = ncol(ref_series)

		# Get medoids indices for this chunk of series
		mi = computeMedoidsIndices(medoids@address, ref_series)

		for (i in seq_len(nb_series))
		{
			if (parll)
				synchronicity::lock(m)
			synchrones[, mi[i] ] = synchrones[, mi[i] ] + ref_series[,i]
			if (sync_mean)
				counts[ mi[i] ] = counts[ mi[i] ] + 1
			if (parll)
				synchronicity::unlock(m)
		}
	}

	K = ncol(medoids) ; L = nrow(medoids)
	# Use bigmemory (shared==TRUE by default) + synchronicity to fill synchrones in //
	# TODO: if size > RAM (not our case), use file-backed big.matrix
	synchrones = bigmemory::big.matrix(nrow=L, ncol=K, type="double", init=0.)
	if (sync_mean)
		counts = bigmemory::big.matrix(nrow=K, ncol=1, type="double", init=0)
	# synchronicity is only for Linux & MacOS; on Windows: run sequentially
	parll = (requireNamespace("synchronicity",quietly=TRUE)
		&& parll && Sys.info()['sysname'] != "Windows")
	if (parll)
	{
		m <- synchronicity::boost.mutex()
		m_desc <- synchronicity::describe(m)
		synchrones_desc = bigmemory::describe(synchrones)
		if (sync_mean)
			counts_desc = bigmemory::describe(counts)
		medoids_desc = bigmemory::describe(medoids)
		cl = parallel::makeCluster(ncores_clust)
		varlist=c("synchrones_desc","sync_mean","m_desc","medoids_desc","getRefSeries")
		if (sync_mean)
			varlist = c(varlist, "counts_desc")
		parallel::clusterExport(cl, varlist, envir=environment())
	}

	if (verbose)
	{
		if (verbose)
			cat(paste("--- Compute ",K," synchrones with ",nb_ref_curves," series\n", sep=""))
	}
	indices_workers = .spreadIndices(seq_len(nb_ref_curves), nb_series_per_chunk)
	ignored <-
		if (parll)
			parallel::parLapply(cl, indices_workers, computeSynchronesChunk)
		else
			lapply(indices_workers, computeSynchronesChunk)

	if (parll)
		parallel::stopCluster(cl)

	if (!sync_mean)
		return (synchrones)

	#TODO: can we avoid this loop? ( synchrones = sweep(synchrones, 2, counts, '/') )
	for (i in seq_len(K))
		synchrones[,i] = synchrones[,i] / counts[i]
	#NOTE: odds for some clusters to be empty? (when series already come from stage 2)
	#      ...maybe; but let's hope resulting K1' be still quite bigger than K2
	noNA_rows = sapply(seq_len(K), function(i) all(!is.nan(synchrones[,i])))
	if (all(noNA_rows))
		return (synchrones)
	# Else: some clusters are empty, need to slice synchrones
	bigmemory::as.big.matrix(synchrones[,noNA_rows])
}

#' computeWerDists
#'
#' Compute the WER distances between the synchrones curves (in rows), which are
#' returned (e.g.) by \code{computeSynchrones()}
#'
#' @param synchrones A big.matrix of synchrones, in rows. The series have same length
#'   as the series in the initial dataset
#' @inheritParams claws
#'
#' @return A matrix of size K1 x K1
#'
#' @export
computeWerDists = function(synchrones, nbytes,endian,ncores_clust=1,verbose=FALSE,parll=TRUE)
{
	n <- nrow(synchrones)
	delta <- ncol(synchrones)
	#TODO: automatic tune of all these parameters ? (for other users)
	nvoice   <- 4
	# noctave = 2^13 = 8192 half hours ~ 180 days ; ~log2(ncol(synchrones))
	noctave = 13
	# 4 here represent 2^5 = 32 half-hours ~ 1 day
	#NOTE: default scalevector == 2^(0:(noctave * nvoice) / nvoice) * s0 (?)
	scalevector  <- 2^(4:(noctave * nvoice) / nvoice + 1)
	#condition: ( log2(s0*w0/(2*pi)) - 1 ) * nvoice + 1.5 >= 1
	s0 = 2
	w0 = 2*pi
	scaled=FALSE
	s0log = as.integer( (log2( s0*w0/(2*pi) ) - 1) * nvoice + 1.5 )
	totnoct = noctave + as.integer(s0log/nvoice) + 1

	Xwer_dist <- bigmemory::big.matrix(nrow=n, ncol=n, type="double")

	cwt_file = ".epclust_bin/cwt"
	#TODO: args, nb_per_chunk, nbytes, endian

	# Generate n(n-1)/2 pairs for WER distances computations
	pairs = list()
	V = seq_len(n)
	for (i in 1:n)
	{
		V = V[-1]
		pairs = c(pairs, lapply(V, function(v) c(i,v)))
	}

	computeSaveCWT = function(index)
	{
		ts <- scale(ts(synchrones[index,]), center=TRUE, scale=scaled)
		totts.cwt = Rwave::cwt(ts, totnoct, nvoice, w0, plot=FALSE)
		ts.cwt = totts.cwt[,s0log:(s0log+noctave*nvoice)]
		#Normalization
		sqs <- sqrt(2^(0:(noctave*nvoice)/nvoice)*s0)
		sqres <- sweep(ts.cwt,2,sqs,'*')
		res <- sqres / max(Mod(sqres))
		#TODO: serializer les CWT, les récupérer via getDataInFile ;
		#--> OK, faut juste stocker comme séries simples de taille delta*ncol (53*17519)
		binarize(c(as.double(Re(res)),as.double(Im(res))), cwt_file, ncol(res), ",", nbytes, endian)
	}

	if (parll)
	{
		cl = parallel::makeCluster(ncores_clust)
		synchrones_desc <- bigmemory::describe(synchrones)
		Xwer_dist_desc <- bigmemory::describe(Xwer_dist)
		parallel::clusterExport(cl, varlist=c("synchrones_desc","Xwer_dist_desc","totnoct",
			"nvoice","w0","s0log","noctave","s0","verbose","getCWT"), envir=environment())
	}
	
	if (verbose)
	{
		cat(paste("--- Compute WER dists\n", sep=""))
	#	precompute save all CWT........
	}
	#precompute and serialize all CWT
	ignored <-
		if (parll)
			parallel::parLapply(cl, 1:n, computeSaveCWT)
		else
			lapply(1:n, computeSaveCWT)

	getCWT = function(index)
	{
		#from cwt_file ...
		res <- getDataInFile(c(2*index-1,2*index), cwt_file, nbytes, endian)
	###############TODO:
	}

	# Distance between rows i and j
	computeDistancesIJ = function(pair)
	{
		if (parll)
		{
			require("bigmemory", quietly=TRUE)
			require("epclust", quietly=TRUE)
			synchrones <- bigmemory::attach.big.matrix(synchrones_desc)
			Xwer_dist <- bigmemory::attach.big.matrix(Xwer_dist_desc)
		}

		i = pair[1] ; j = pair[2]
		if (verbose && j==i+1)
			cat(paste("   Distances (",i,",",j,"), (",i,",",j+1,") ...\n", sep=""))
		cwt_i <- getCWT(i)
		cwt_j <- getCWT(j)

		num <- epclustFilter(Mod(cwt_i * Conj(cwt_j)))
		WX  <- epclustFilter(Mod(cwt_i * Conj(cwt_i)))
		WY <- epclustFilter(Mod(cwt_j * Conj(cwt_j)))
		wer2 <- sum(colSums(num)^2) / sum(colSums(WX) * colSums(WY))
		Xwer_dist[i,j] <- sqrt(delta * ncol(cwt_i) * max(1 - wer2, 0.)) #FIXME: wer2 should be < 1
		Xwer_dist[j,i] <- Xwer_dist[i,j]
		Xwer_dist[i,i] = 0.
	}

	if (verbose)
	{
		cat(paste("--- Compute WER dists\n", sep=""))
	}
	ignored <-
		if (parll)
			parallel::parLapply(cl, pairs, computeDistancesIJ)
		else
			lapply(pairs, computeDistancesIJ)

	if (parll)
		parallel::stopCluster(cl)

	Xwer_dist[n,n] = 0.
	distances <- Xwer_dist[,]
	rm(Xwer_dist) ; gc()
	distances #~small matrix K1 x K1
}

# Helper function to divide indices into balanced sets
.spreadIndices = function(indices, nb_per_set)
{
	L = length(indices)
	nb_workers = floor( L / nb_per_set )
	rem = L %% nb_per_set
	if (nb_workers == 0 || (nb_workers==1 && rem==0))
	{
		# L <= nb_per_set, simple case
		indices_workers = list(indices)
	}
	else
	{
		indices_workers = lapply( seq_len(nb_workers), function(i)
			indices[(nb_per_set*(i-1)+1):(nb_per_set*i)] )
		# Spread the remaining load among the workers
		rem = L %% nb_per_set
		while (rem > 0)
		{
			index = rem%%nb_workers + 1
			indices_workers[[index]] = c(indices_workers[[index]], indices[L-rem+1])
			rem = rem - 1
		}
	}
	indices_workers
}
Commit	Line	Data
	1	#' @name clustering
	2	#' @rdname clustering
	3	#' @aliases clusteringTask1 clusteringTask2 computeClusters1 computeClusters2
	4	#'
	5	#' @title Two-stage clustering, withing one task (see \code{claws()})
	6	#'
	7	#' @description \code{clusteringTask1()} runs one full stage-1 task, which consists in
	8	#' iterated stage 1 clustering (on nb_curves / ntasks energy contributions, computed
	9	#' through discrete wavelets coefficients).
	10	#' \code{clusteringTask2()} runs a full stage-2 task, which consists in synchrones
	11	#' and then WER distances computations, before applying the clustering algorithm.
	12	#' \code{computeClusters1()} and \code{computeClusters2()} correspond to the atomic
	13	#' clustering procedures respectively for stage 1 and 2. The former applies the
	14	#' first clustering algorithm on a contributions matrix, while the latter clusters
	15	#' a set of series inside one task (~nb_items_clust1)
	16	#'
	17	#' @param indices Range of series indices to cluster in parallel (initial data)
	18	#' @param getContribs Function to retrieve contributions from initial series indices:
	19	#' \code{getContribs(indices)} outpus a contributions matrix
	20	#' @inheritParams computeSynchrones
	21	#' @inheritParams claws
	22	#'
	23	#' @return For \code{clusteringTask1()}, the indices of the computed (K1) medoids.
	24	#' Indices are irrelevant for stage 2 clustering, thus \code{clusteringTask2()}
	25	#' outputs a big.matrix of medoids (of size LxK2, K2 = final number of clusters)
	26	NULL
	27
	28	#' @rdname clustering
	29	#' @export
	30	clusteringTask1 = function(indices, getContribs, K1, algoClust1, nb_items_clust1,
	31	ncores_clust=1, verbose=FALSE, parll=TRUE)
	32	{
	33	if (parll)
	34	{
	35	cl = parallel::makeCluster(ncores_clust, outfile = "")
	36	parallel::clusterExport(cl, varlist=c("getContribs","K1","verbose"), envir=environment())
	37	}
	38	while (length(indices) > K1)
	39	{
	40	indices_workers = .spreadIndices(indices, nb_items_clust1)
	41	if (verbose)
	42	cat(paste("*** [iterated] Clustering task 1 on ",length(indices)," series\n", sep=""))
	43	indices <-
	44	if (parll)
	45	{
	46	unlist( parallel::parLapply(cl, indices_workers, function(inds) {
	47	require("epclust", quietly=TRUE)
	48	inds[ algoClust1(getContribs(inds), K1) ]
	49	}) )
	50	}
	51	else
	52	{
	53	unlist( lapply(indices_workers, function(inds)
	54	inds[ algoClust1(getContribs(inds), K1) ]
	55	) )
	56	}
	57	}
	58	if (parll)
	59	parallel::stopCluster(cl)
	60
	61	indices #medoids
	62	}
	63
	64	#' @rdname clustering
	65	#' @export
	66	clusteringTask2 = function(medoids, K2, algoClust2, getRefSeries, nb_ref_curves,
	67	nb_series_per_chunk, sync_mean, nbytes,endian,ncores_clust=1,verbose=FALSE,parll=TRUE)
	68	{
	69	if (verbose)
	70	cat(paste("*** Clustering task 2 on ",ncol(medoids)," synchrones\n", sep=""))
	71
	72	if (ncol(medoids) <= K2)
	73	return (medoids)
	74	synchrones = computeSynchrones(medoids, getRefSeries, nb_ref_curves,
	75	nb_series_per_chunk, sync_mean, ncores_clust, verbose, parll)
	76	distances = computeWerDists(synchrones, nbytes, endian, ncores_clust, verbose, parll)
	77	if (verbose)
	78	cat(paste(" algoClust2() on ",nrow(distances)," items\n", sep=""))
	79	medoids[ ,algoClust2(distances,K2) ]
	80	}
	81
	82	#' computeSynchrones
	83	#'
	84	#' Compute the synchrones curves (sum of clusters elements) from a matrix of medoids,
	85	#' using L2 distances.
	86	#'
	87	#' @param medoids big.matrix of medoids (curves of same length as initial series)
	88	#' @param getRefSeries Function to retrieve initial series (e.g. in stage 2 after series
	89	#' have been replaced by stage-1 medoids)
	90	#' @param nb_ref_curves How many reference series? (This number is known at this stage)
	91	#' @inheritParams claws
	92	#'
	93	#' @return A big.matrix of size L x K1 where L = length of a serie
	94	#'
	95	#' @export
	96	computeSynchrones = function(medoids, getRefSeries, nb_ref_curves,
	97	nb_series_per_chunk, sync_mean, ncores_clust=1,verbose=FALSE,parll=TRUE)
	98	{
	99	computeSynchronesChunk = function(indices)
	100	{
	101	if (parll)
	102	{
	103	require("bigmemory", quietly=TRUE)
	104	requireNamespace("synchronicity", quietly=TRUE)
	105	require("epclust", quietly=TRUE)
	106	synchrones <- bigmemory::attach.big.matrix(synchrones_desc)
	107	if (sync_mean)
	108	counts <- bigmemory::attach.big.matrix(counts_desc)
	109	medoids <- bigmemory::attach.big.matrix(medoids_desc)
	110	m <- synchronicity::attach.mutex(m_desc)
	111	}
	112
	113	ref_series = getRefSeries(indices)
	114	nb_series = ncol(ref_series)
	115
	116	# Get medoids indices for this chunk of series
	117	mi = computeMedoidsIndices(medoids@address, ref_series)
	118
	119	for (i in seq_len(nb_series))
	120	{
	121	if (parll)
	122	synchronicity::lock(m)
	123	synchrones[, mi[i] ] = synchrones[, mi[i] ] + ref_series[,i]
	124	if (sync_mean)
	125	counts[ mi[i] ] = counts[ mi[i] ] + 1
	126	if (parll)
	127	synchronicity::unlock(m)
	128	}
	129	}
	130
	131	K = ncol(medoids) ; L = nrow(medoids)
	132	# Use bigmemory (shared==TRUE by default) + synchronicity to fill synchrones in //
	133	# TODO: if size > RAM (not our case), use file-backed big.matrix
	134	synchrones = bigmemory::big.matrix(nrow=L, ncol=K, type="double", init=0.)
	135	if (sync_mean)
	136	counts = bigmemory::big.matrix(nrow=K, ncol=1, type="double", init=0)
	137	# synchronicity is only for Linux & MacOS; on Windows: run sequentially
	138	parll = (requireNamespace("synchronicity",quietly=TRUE)
	139	&& parll && Sys.info()['sysname'] != "Windows")
	140	if (parll)
	141	{
	142	m <- synchronicity::boost.mutex()
	143	m_desc <- synchronicity::describe(m)
	144	synchrones_desc = bigmemory::describe(synchrones)
	145	if (sync_mean)
	146	counts_desc = bigmemory::describe(counts)
	147	medoids_desc = bigmemory::describe(medoids)
	148	cl = parallel::makeCluster(ncores_clust)
	149	varlist=c("synchrones_desc","sync_mean","m_desc","medoids_desc","getRefSeries")
	150	if (sync_mean)
	151	varlist = c(varlist, "counts_desc")
	152	parallel::clusterExport(cl, varlist, envir=environment())
	153	}
	154
	155	if (verbose)
	156	{
	157	if (verbose)
	158	cat(paste("--- Compute ",K," synchrones with ",nb_ref_curves," series\n", sep=""))
	159	}
	160	indices_workers = .spreadIndices(seq_len(nb_ref_curves), nb_series_per_chunk)
	161	ignored <-
	162	if (parll)
	163	parallel::parLapply(cl, indices_workers, computeSynchronesChunk)
	164	else
	165	lapply(indices_workers, computeSynchronesChunk)
	166
	167	if (parll)
	168	parallel::stopCluster(cl)
	169
	170	if (!sync_mean)
	171	return (synchrones)
	172
	173	#TODO: can we avoid this loop? ( synchrones = sweep(synchrones, 2, counts, '/') )
	174	for (i in seq_len(K))
	175	synchrones[,i] = synchrones[,i] / counts[i]
	176	#NOTE: odds for some clusters to be empty? (when series already come from stage 2)
	177	# ...maybe; but let's hope resulting K1' be still quite bigger than K2
	178	noNA_rows = sapply(seq_len(K), function(i) all(!is.nan(synchrones[,i])))
	179	if (all(noNA_rows))
	180	return (synchrones)
	181	# Else: some clusters are empty, need to slice synchrones
	182	bigmemory::as.big.matrix(synchrones[,noNA_rows])
	183	}
	184
	185	#' computeWerDists
	186	#'
	187	#' Compute the WER distances between the synchrones curves (in rows), which are
	188	#' returned (e.g.) by \code{computeSynchrones()}
	189	#'
	190	#' @param synchrones A big.matrix of synchrones, in rows. The series have same length
	191	#' as the series in the initial dataset
	192	#' @inheritParams claws
	193	#'
	194	#' @return A matrix of size K1 x K1
	195	#'
	196	#' @export
	197	computeWerDists = function(synchrones, nbytes,endian,ncores_clust=1,verbose=FALSE,parll=TRUE)
	198	{
	199	n <- nrow(synchrones)
	200	delta <- ncol(synchrones)
	201	#TODO: automatic tune of all these parameters ? (for other users)
	202	nvoice <- 4
	203	# noctave = 2^13 = 8192 half hours ~ 180 days ; ~log2(ncol(synchrones))
	204	noctave = 13
	205	# 4 here represent 2^5 = 32 half-hours ~ 1 day
	206	#NOTE: default scalevector == 2^(0:(noctave * nvoice) / nvoice) * s0 (?)
	207	scalevector <- 2^(4:(noctave * nvoice) / nvoice + 1)
	208	#condition: ( log2(s0w0/(2pi)) - 1 ) * nvoice + 1.5 >= 1
	209	s0 = 2
	210	w0 = 2*pi
	211	scaled=FALSE
	212	s0log = as.integer( (log2( s0w0/(2pi) ) - 1) * nvoice + 1.5 )
	213	totnoct = noctave + as.integer(s0log/nvoice) + 1
	214
	215	Xwer_dist <- bigmemory::big.matrix(nrow=n, ncol=n, type="double")
	216
	217	cwt_file = ".epclust_bin/cwt"
	218	#TODO: args, nb_per_chunk, nbytes, endian
	219
	220	# Generate n(n-1)/2 pairs for WER distances computations
	221	pairs = list()
	222	V = seq_len(n)
	223	for (i in 1:n)
	224	{
	225	V = V[-1]
	226	pairs = c(pairs, lapply(V, function(v) c(i,v)))
	227	}
	228
	229	computeSaveCWT = function(index)
	230	{
	231	ts <- scale(ts(synchrones[index,]), center=TRUE, scale=scaled)
	232	totts.cwt = Rwave::cwt(ts, totnoct, nvoice, w0, plot=FALSE)
	233	ts.cwt = totts.cwt[,s0log:(s0log+noctave*nvoice)]
	234	#Normalization
	235	sqs <- sqrt(2^(0:(noctavenvoice)/nvoice)s0)
	236	sqres <- sweep(ts.cwt,2,sqs,'*')
	237	res <- sqres / max(Mod(sqres))
	238	#TODO: serializer les CWT, les récupérer via getDataInFile ;
	239	#--> OK, faut juste stocker comme séries simples de taille deltancol (5317519)
	240	binarize(c(as.double(Re(res)),as.double(Im(res))), cwt_file, ncol(res), ",", nbytes, endian)
	241	}
	242
	243	if (parll)
	244	{
	245	cl = parallel::makeCluster(ncores_clust)
	246	synchrones_desc <- bigmemory::describe(synchrones)
	247	Xwer_dist_desc <- bigmemory::describe(Xwer_dist)
	248	parallel::clusterExport(cl, varlist=c("synchrones_desc","Xwer_dist_desc","totnoct",
	249	"nvoice","w0","s0log","noctave","s0","verbose","getCWT"), envir=environment())
	250	}
	251
	252	if (verbose)
	253	{
	254	cat(paste("--- Compute WER dists\n", sep=""))
	255	# precompute save all CWT........
	256	}
	257	#precompute and serialize all CWT
	258	ignored <-
	259	if (parll)
	260	parallel::parLapply(cl, 1:n, computeSaveCWT)
	261	else
	262	lapply(1:n, computeSaveCWT)
	263
	264	getCWT = function(index)
	265	{
	266	#from cwt_file ...
	267	res <- getDataInFile(c(2index-1,2index), cwt_file, nbytes, endian)
	268	###############TODO:
	269	}
	270
	271	# Distance between rows i and j
	272	computeDistancesIJ = function(pair)
	273	{
	274	if (parll)
	275	{
	276	require("bigmemory", quietly=TRUE)
	277	require("epclust", quietly=TRUE)
	278	synchrones <- bigmemory::attach.big.matrix(synchrones_desc)
	279	Xwer_dist <- bigmemory::attach.big.matrix(Xwer_dist_desc)
	280	}
	281
	282	i = pair[1] ; j = pair[2]
	283	if (verbose && j==i+1)
	284	cat(paste(" Distances (",i,",",j,"), (",i,",",j+1,") ...\n", sep=""))
	285	cwt_i <- getCWT(i)
	286	cwt_j <- getCWT(j)
	287
	288	num <- epclustFilter(Mod(cwt_i * Conj(cwt_j)))
	289	WX <- epclustFilter(Mod(cwt_i * Conj(cwt_i)))
	290	WY <- epclustFilter(Mod(cwt_j * Conj(cwt_j)))
	291	wer2 <- sum(colSums(num)^2) / sum(colSums(WX) * colSums(WY))
	292	Xwer_dist[i,j] <- sqrt(delta * ncol(cwt_i) * max(1 - wer2, 0.)) #FIXME: wer2 should be < 1
	293	Xwer_dist[j,i] <- Xwer_dist[i,j]
	294	Xwer_dist[i,i] = 0.
	295	}
	296
	297	if (verbose)
	298	{
	299	cat(paste("--- Compute WER dists\n", sep=""))
	300	}
	301	ignored <-
	302	if (parll)
	303	parallel::parLapply(cl, pairs, computeDistancesIJ)
	304	else
	305	lapply(pairs, computeDistancesIJ)
	306
	307	if (parll)
	308	parallel::stopCluster(cl)
	309
	310	Xwer_dist[n,n] = 0.
	311	distances <- Xwer_dist[,]
	312	rm(Xwer_dist) ; gc()
	313	distances #~small matrix K1 x K1
	314	}
	315
	316	# Helper function to divide indices into balanced sets
	317	.spreadIndices = function(indices, nb_per_set)
	318	{
	319	L = length(indices)
	320	nb_workers = floor( L / nb_per_set )
	321	rem = L %% nb_per_set
	322	if (nb_workers == 0 \|\| (nb_workers==1 && rem==0))
	323	{
	324	# L <= nb_per_set, simple case
	325	indices_workers = list(indices)
	326	}
	327	else
	328	{
	329	indices_workers = lapply( seq_len(nb_workers), function(i)
	330	indices[(nb_per_set(i-1)+1):(nb_per_seti)] )
	331	# Spread the remaining load among the workers
	332	rem = L %% nb_per_set
	333	while (rem > 0)
	334	{
	335	index = rem%%nb_workers + 1
	336	indices_workers[[index]] = c(indices_workers[[index]], indices[L-rem+1])
	337	rem = rem - 1
	338	}
	339	}
	340	indices_workers
	341	}