[epclust.git] / epclust / R / clustering.R

oneIteration = function(..........)
{
		cl_clust = parallel::makeCluster(ncores_clust)
		parallel::clusterExport(cl_clust, .............., envir=........)
		indices_clust = indices_task[[i]]
		repeat
		{
			nb_workers = max( 1, round( length(indices_clust) / nb_series_per_chunk ) )
			indices_workers = list()
			#indices[[i]] == (start_index,number_of_elements)
			for (i in 1:nb_workers)
			{
				upper_bound = ifelse( i<nb_workers,
					min(nb_series_per_chunk*i,length(indices_clust)), length(indices_clust) )
				indices_workers[[i]] = indices_clust[(nb_series_per_chunk*(i-1)+1):upper_bound]
			}
			indices_clust = parallel::parSapply(cl, indices_workers, processChunk, K1, K2*(WER=="mix"))
			if ( (WER=="end" && length(indices_clust) == K1) ||
				(WER=="mix" && length(indices_clust) == K2) )
			{
				break
			}
		}
		parallel::stopCluster(cl_clust)
		res_clust
}

processChunk = function(indices, K1, K2)
{
	#1) retrieve data (coeffs)
	coeffs = getCoeffs(indices)
	#2) cluster
	cl = computeClusters(as.matrix(coeffs[,2:ncol(coeffs)]), K1)
	#3) WER (optional)
	if (K2 > 0)
	{
		curves = computeSynchrones(cl)
		dists = computeWerDists(curves)
		cl = computeClusters(dists, K2)
	}
	cl
}

computeClusters = function(data, K)
{
	library(cluster)
	pam_output = cluster::pam(data, K)
	return ( list( clusts=pam_output$clustering, medoids=pam_output$medoids,
		ranks=pam_output$id.med ) )
}

#TODO: appendCoeffs() en C --> serialize et append to file

computeSynchrones = function(...)
{

}

#Entrée : courbes synchrones, soit après étape 1 itérée, soit après chaqure étape 1
computeWerDist = function(conso)
{
	if (!require("Rwave", quietly=TRUE))
		stop("Unable to load Rwave library")
	n <- nrow(conso)
	delta <- ncol(conso)
	#TODO: automatic tune of all these parameters ? (for other users)
	nvoice   <- 4
	# noctave = 2^13 = 8192 half hours ~ 180 days ; ~log2(ncol(conso))
	noctave = 13
	# 4 here represent 2^5 = 32 half-hours ~ 1 day
	#NOTE: default scalevector == 2^(0:(noctave * nvoice) / nvoice) * s0 (?)
	scalevector  <- 2^(4:(noctave * nvoice) / nvoice) * 2
	#condition: ( log2(s0*w0/(2*pi)) - 1 ) * nvoice + 1.5 >= 1
	s0=2
	w0=2*pi
	scaled=FALSE
	s0log = as.integer( (log2( s0*w0/(2*pi) ) - 1) * nvoice + 1.5 )
	totnoct = noctave + as.integer(s0log/nvoice) + 1

	# (normalized) observations node with CWT
	Xcwt4 <- lapply(seq_len(n), function(i) {
		ts <- scale(ts(conso[i,]), center=TRUE, scale=scaled)
		totts.cwt = Rwave::cwt(ts,totnoct,nvoice,w0,plot=0)
		ts.cwt = totts.cwt[,s0log:(s0log+noctave*nvoice)]
		#Normalization
		sqs <- sqrt(2^(0:(noctave*nvoice)/nvoice)*s0)
		sqres <- sweep(ts.cwt,MARGIN=2,sqs,'*')
		sqres / max(Mod(sqres))
	})

	Xwer_dist <- matrix(0., n, n)
	fcoefs = rep(1/3, 3) #moving average on 3 values (TODO: very slow! correct?!)
	for (i in 1:(n-1))
	{
		for (j in (i+1):n)
		{
			#TODO: later, compute CWT here (because not enough storage space for 32M series)
			#      'circular=TRUE' is wrong, should just take values on the sides; to rewrite in C
			num <- filter(Mod(Xcwt4[[i]] * Conj(Xcwt4[[j]])), fcoefs, circular=TRUE)
			WX <- filter(Mod(Xcwt4[[i]] * Conj(Xcwt4[[i]])), fcoefs, circular=TRUE)
			WY <- filter(Mod(Xcwt4[[j]] * Conj(Xcwt4[[j]])), fcoefs, circular=TRUE)
			wer2    <- sum(colSums(num)^2) / sum( sum(colSums(WX) * colSums(WY)) )
			Xwer_dist[i,j] <- sqrt(delta * ncol(Xcwt4[[1]]) * (1 - wer2))
			Xwer_dist[j,i] <- Xwer_dist[i,j]
		}
	}
	diag(Xwer_dist) <- numeric(n)
	Xwer_dist
}
Commit	Line	Data
	1	oneIteration = function(..........)
	2	{
	3	cl_clust = parallel::makeCluster(ncores_clust)
	4	parallel::clusterExport(cl_clust, .............., envir=........)
	5	indices_clust = indices_task[[i]]
	6	repeat
	7	{
	8	nb_workers = max( 1, round( length(indices_clust) / nb_series_per_chunk ) )
	9	indices_workers = list()
	10	#indices[[i]] == (start_index,number_of_elements)
	11	for (i in 1:nb_workers)
	12	{
	13	upper_bound = ifelse( i<nb_workers,
	14	min(nb_series_per_chunk*i,length(indices_clust)), length(indices_clust) )
	15	indices_workers[[i]] = indices_clust[(nb_series_per_chunk*(i-1)+1):upper_bound]
	16	}
	17	indices_clust = parallel::parSapply(cl, indices_workers, processChunk, K1, K2*(WER=="mix"))
	18	if ( (WER=="end" && length(indices_clust) == K1) \|\|
	19	(WER=="mix" && length(indices_clust) == K2) )
	20	{
	21	break
	22	}
	23	}
	24	parallel::stopCluster(cl_clust)
	25	res_clust
	26	}
	27
	28	processChunk = function(indices, K1, K2)
	29	{
	30	#1) retrieve data (coeffs)
	31	coeffs = getCoeffs(indices)
	32	#2) cluster
	33	cl = computeClusters(as.matrix(coeffs[,2:ncol(coeffs)]), K1)
	34	#3) WER (optional)
	35	if (K2 > 0)
	36	{
	37	curves = computeSynchrones(cl)
	38	dists = computeWerDists(curves)
	39	cl = computeClusters(dists, K2)
	40	}
	41	cl
	42	}
	43
	44	computeClusters = function(data, K)
	45	{
	46	library(cluster)
	47	pam_output = cluster::pam(data, K)
	48	return ( list( clusts=pam_output$clustering, medoids=pam_output$medoids,
	49	ranks=pam_output$id.med ) )
	50	}
	51
	52	#TODO: appendCoeffs() en C --> serialize et append to file
	53
	54	computeSynchrones = function(...)
	55	{
	56
	57	}
	58
	59	#Entrée : courbes synchrones, soit après étape 1 itérée, soit après chaqure étape 1
	60	computeWerDist = function(conso)
	61	{
	62	if (!require("Rwave", quietly=TRUE))
	63	stop("Unable to load Rwave library")
	64	n <- nrow(conso)
	65	delta <- ncol(conso)
	66	#TODO: automatic tune of all these parameters ? (for other users)
	67	nvoice <- 4
	68	# noctave = 2^13 = 8192 half hours ~ 180 days ; ~log2(ncol(conso))
	69	noctave = 13
	70	# 4 here represent 2^5 = 32 half-hours ~ 1 day
	71	#NOTE: default scalevector == 2^(0:(noctave * nvoice) / nvoice) * s0 (?)
	72	scalevector <- 2^(4:(noctave * nvoice) / nvoice) * 2
	73	#condition: ( log2(s0w0/(2pi)) - 1 ) * nvoice + 1.5 >= 1
	74	s0=2
	75	w0=2*pi
	76	scaled=FALSE
	77	s0log = as.integer( (log2( s0w0/(2pi) ) - 1) * nvoice + 1.5 )
	78	totnoct = noctave + as.integer(s0log/nvoice) + 1
	79
	80	# (normalized) observations node with CWT
	81	Xcwt4 <- lapply(seq_len(n), function(i) {
	82	ts <- scale(ts(conso[i,]), center=TRUE, scale=scaled)
	83	totts.cwt = Rwave::cwt(ts,totnoct,nvoice,w0,plot=0)
	84	ts.cwt = totts.cwt[,s0log:(s0log+noctave*nvoice)]
	85	#Normalization
	86	sqs <- sqrt(2^(0:(noctavenvoice)/nvoice)s0)
	87	sqres <- sweep(ts.cwt,MARGIN=2,sqs,'*')
	88	sqres / max(Mod(sqres))
	89	})
	90
	91	Xwer_dist <- matrix(0., n, n)
	92	fcoefs = rep(1/3, 3) #moving average on 3 values (TODO: very slow! correct?!)
	93	for (i in 1:(n-1))
	94	{
	95	for (j in (i+1):n)
	96	{
	97	#TODO: later, compute CWT here (because not enough storage space for 32M series)
	98	# 'circular=TRUE' is wrong, should just take values on the sides; to rewrite in C
	99	num <- filter(Mod(Xcwt4[[i]] * Conj(Xcwt4[[j]])), fcoefs, circular=TRUE)
	100	WX <- filter(Mod(Xcwt4[[i]] * Conj(Xcwt4[[i]])), fcoefs, circular=TRUE)
	101	WY <- filter(Mod(Xcwt4[[j]] * Conj(Xcwt4[[j]])), fcoefs, circular=TRUE)
	102	wer2 <- sum(colSums(num)^2) / sum( sum(colSums(WX) * colSums(WY)) )
	103	Xwer_dist[i,j] <- sqrt(delta * ncol(Xcwt4[[1]]) * (1 - wer2))
	104	Xwer_dist[j,i] <- Xwer_dist[i,j]
	105	}
	106	}
	107	diag(Xwer_dist) <- numeric(n)
	108	Xwer_dist
	109	}