context("clustering")

#shorthand: map 1->1, 2->2, 3->3, 4->1, ..., 149->2, 150->3, ... (is base==3)
I = function(i, base)
	(i-1) %% base + 1

test_that("computeClusters1&2 behave as expected",
{
	require("MASS", quietly=TRUE)
	if (!require("clue", quietly=TRUE))
		skip("'clue' package not available")

	# 3 gaussian clusters, 300 items; and then 7 gaussian clusters, 490 items
	n = 300
	d = 5
	K = 3
	for (ndK in list( c(300,5,3), c(490,10,7) ))
	{
		n = ndK[1] ; d = ndK[2] ; K = ndK[3]
		cs = n/K #cluster size
		Id = diag(d)
		coefs = do.call(rbind,
			lapply(1:K, function(i) MASS::mvrnorm(cs, c(rep(0,(i-1)),5,rep(0,d-i)), Id)))
		indices_medoids1 = computeClusters1(coefs, K, verbose=TRUE)
		indices_medoids2 = computeClusters2(dist(coefs), K, verbose=TRUE)
		# Get coefs assignments (to medoids)
		assignment1 = sapply(seq_len(n), function(i)
			which.min( rowSums( sweep(coefs[indices_medoids1,],2,coefs[i,],'-')^2 ) ) )
		assignment2 = sapply(seq_len(n), function(i)
			which.min( rowSums( sweep(coefs[indices_medoids2,],2,coefs[i,],'-')^2 ) ) )
		for (i in 1:K)
		{
			expect_equal(sum(assignment1==i), cs, tolerance=5)
			expect_equal(sum(assignment2==i), cs, tolerance=5)
		}

		costs_matrix1 = matrix(nrow=K,ncol=K)
		costs_matrix2 = matrix(nrow=K,ncol=K)
		for (i in 1:K)
		{
			for (j in 1:K)
			{
				# assign i (in result) to j (order 1,2,3)
				costs_matrix1[i,j] = abs( mean(assignment1[((i-1)*cs+1):(i*cs)]) - j )
				costs_matrix2[i,j] = abs( mean(assignment2[((i-1)*cs+1):(i*cs)]) - j )
			}
		}
		permutation1 = as.integer( clue::solve_LSAP(costs_matrix1) )
		permutation2 = as.integer( clue::solve_LSAP(costs_matrix2) )
		for (i in 1:K)
		{
			expect_equal(
				mean(assignment1[((i-1)*cs+1):(i*cs)]), permutation1[i], tolerance=0.05)
			expect_equal(
				mean(assignment2[((i-1)*cs+1):(i*cs)]), permutation2[i], tolerance=0.05)
		}
	}
})

test_that("computeSynchrones behave as expected",
{
	n = 300
	x = seq(0,9.5,0.1)
	L = length(x) #96 1/4h
	K = 3
	s1 = cos(x)
	s2 = sin(x)
	s3 = c( s1[1:(L%/%2)] , s2[(L%/%2+1):L] )
	#sum((s1-s2)^2) == 96
	#sum((s1-s3)^2) == 58
	#sum((s2-s3)^2) == 38
	s = list(s1, s2, s3)
	series = matrix(nrow=n, ncol=L)
	for (i in seq_len(n))
		series[i,] = s[[I(i,K)]] + rnorm(L,sd=0.01)
	getRefSeries = function(indices) {
		indices = indices[indices <= n]
		if (length(indices)>0) series[indices,] else NULL
	}
	synchrones = computeSynchrones(bigmemory::as.big.matrix(rbind(s1,s2,s3)), getRefSeries,
		n, 100, verbose=TRUE, parll=FALSE)

	expect_equal(dim(synchrones), c(K,L))
	for (i in 1:K)
		expect_equal(synchrones[i,], s[[i]], tolerance=0.01)
})

# NOTE: medoids can be a big.matrix
computeDistortion = function(series, medoids)
{
	n = nrow(series) ; L = ncol(series)
	distortion = 0.
	if (bigmemory::is.big.matrix(medoids))
		medoids = medoids[,]
	for (i in seq_len(n))
		distortion = distortion + min( rowSums( sweep(medoids,2,series[i,],'-')^2 ) / L )
	distortion / n
}

test_that("clusteringTask1 behave as expected",
{
	n = 900
	x = seq(0,9.5,0.1)
	L = length(x) #96 1/4h
	K1 = 60
	s = lapply( seq_len(K1), function(i) x^(1+i/30)*cos(x+i) )
	series = matrix(nrow=n, ncol=L)
	for (i in seq_len(n))
		series[i,] = s[[I(i,K1)]] + rnorm(L,sd=0.01)
	getSeries = function(indices) {
		indices = indices[indices <= n]
		if (length(indices)>0) series[indices,] else NULL
	}
	wf = "haar"
	ctype = "absolute"
	getContribs = function(indices) curvesToContribs(series[indices,],wf,ctype)
	indices1 = clusteringTask1(1:n, getContribs, K1, 75, verbose=TRUE, parll=FALSE)
	medoids_K1 = getSeries(indices1)

	expect_equal(dim(medoids_K1), c(K1,L))
	# Not easy to evaluate result: at least we expect it to be better than random selection of
	# medoids within initial series
	distorGood = computeDistortion(series, medoids_K1)
	for (i in 1:3)
		expect_lte( distorGood, computeDistortion(series,series[sample(1:n, K1),]) )
})

test_that("clusteringTask2 behave as expected",
{
	n = 900
	x = seq(0,9.5,0.1)
	L = length(x) #96 1/4h
	K1 = 60
	K2 = 3
	#for (i in 1:60) {plot(x^(1+i/30)*cos(x+i),type="l",col=i,ylim=c(-50,50)); par(new=TRUE)}
	s = lapply( seq_len(K1), function(i) x^(1+i/30)*cos(x+i) )
	series = matrix(nrow=n, ncol=L)
	for (i in seq_len(n))
		series[i,] = s[[I(i,K1)]] + rnorm(L,sd=0.01)
	getRefSeries = function(indices) {
		indices = indices[indices <= n]
		if (length(indices)>0) series[indices,] else NULL
	}
	# Artificially simulate 60 medoids - perfect situation, all equal to one of the refs
	medoids_K1 = bigmemory::as.big.matrix(
		do.call(rbind, lapply( 1:K1, function(i) s[[I(i,K1)]] ) ) )
	medoids_K2 = clusteringTask2(medoids_K1, K2, getRefSeries, n, 75, verbose=TRUE, parll=FALSE)

	expect_equal(dim(medoids_K2), c(K2,L))
	# Not easy to evaluate result: at least we expect it to be better than random selection of
	# medoids within 1...K1 (among references)
	distorGood = computeDistortion(series, medoids_K2)
	for (i in 1:3)
		expect_lte( distorGood, computeDistortion(series,medoids_K1[sample(1:K1, K2),]) )
})

#NOTE: rather redundant test
#test_that("clusteringTask1 + clusteringTask2 behave as expected",
#{
#	n = 900
#	x = seq(0,9.5,0.1)
#	L = length(x) #96 1/4h
#	K1 = 60
#	K2 = 3
#	s = lapply( seq_len(K1), function(i) x^(1+i/30)*cos(x+i) )
#	series = matrix(nrow=n, ncol=L)
#	for (i in seq_len(n))
#		series[i,] = s[[I(i,K1)]] + rnorm(L,sd=0.01)
#	getSeries = function(indices) {
#		indices = indices[indices <= n]
#		if (length(indices)>0) series[indices,] else NULL
#	}
#	wf = "haar"
#	ctype = "absolute"
#	getContribs = function(indices) curvesToContribs(series[indices,],wf,ctype)
#	require("bigmemory", quietly=TRUE)
#	indices1 = clusteringTask1(1:n, getContribs, K1, 75, verbose=TRUE, parll=FALSE)
#	medoids_K1 = bigmemory::as.big.matrix( getSeries(indices1) )
#	medoids_K2 = clusteringTask2(medoids_K1, K2, getSeries, n, 120, verbose=TRUE, parll=FALSE)
#
#	expect_equal(dim(medoids_K1), c(K1,L))
#	expect_equal(dim(medoids_K2), c(K2,L))
#	# Not easy to evaluate result: at least we expect it to be better than random selection of
#	# medoids within 1...K1 (among references)
#	distorGood = computeDistortion(series, medoids_K2)
#	for (i in 1:3)
#		expect_lte( distorGood, computeDistortion(series,medoids_K1[sample(1:K1, K2),]) )
#})