X-Git-Url: https://git.auder.net/images/diag_mark.svg?a=blobdiff_plain;f=old_C_code%2Fstage2_UNFINISHED%2Fsrc%2Funused%2F02_cluster-par_2009.r;fp=old_C_code%2Fstage2_UNFINISHED%2Fsrc%2Funused%2F02_cluster-par_2009.r;h=0000000000000000000000000000000000000000;hb=62deb4244895a20a35397dfb062f0b9fe94c5012;hp=0f138c9dffc3665972826006525c8a7bc17a052a;hpb=3eef8d3df59ded9a281cff51f79fe824198a7427;p=epclust.git diff --git a/old_C_code/stage2_UNFINISHED/src/unused/02_cluster-par_2009.r b/old_C_code/stage2_UNFINISHED/src/unused/02_cluster-par_2009.r deleted file mode 100644 index 0f138c9..0000000 --- a/old_C_code/stage2_UNFINISHED/src/unused/02_cluster-par_2009.r +++ /dev/null @@ -1,106 +0,0 @@ -## File: extract-features.r - -rm(list = ls()) - -## a. Load data & libraries #### - -#library(cluster) -#library(snow) -library(foreach) -library(doParallel) - -MOJARRITA <- Sys.info()[4] == "mojarrita" - -if(MOJARRITA){ - setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/") -} else { - setwd("~/2014_EDF-Orsay-Lyon2/codes/") -} - -#source('http://eric.univ-lyon2.fr/~jcugliari/codes/functional-clustering.r') -source('01_StBr.r') - -matcontrib0 <- read.table(file = "~/tmp/2009_contrib.txt") -n <- nrow(matcontrib0) - -sdcontrib <- apply(matcontrib0, 1, sd) -lims <- quantile(sdcontrib, probs = c(.005, .995)) # obtain 1%-extreme data -is_normal <- which((sdcontrib > lims[1]) & (sdcontrib < lims[2])) - -matcontri_ext <- matcontrib0[-is_normal, ] -matcontrib <- matcontrib0[is_normal, ] # wipe out aberrant data - -matcontrib <- t(apply(matcontrib, 1, function(x) x / sum(x))) -matcontrib <- t(apply(matcontrib, 1, function(p) log(p / (1 - p)) )) - - -## b. Transform data & compute CI #### -ci <- CI(matcontrib) -tdata <- ci$tdata; rownames(tdata) <- rownames(matcontrib) -selvar <- ci$selectv - -## c. Clustering ########## - -#number of iterations -iters <- 20 - -#setup parallel backend to use 8 processors -cl <- makeCluster(20) -registerDoParallel(cl) - -clfitlist <- foreach(icount(iters)) %dopar% { - library(cluster) - K <- 200 - clara(x = tdata[, selvar], - k = K, - sampsize = 4000, - samples = 4, - rngR = TRUE) -} - -stopCluster(cl) - -#save(clfit, file = 'clfit500.Rdata') -# save(clfit, file = 'clfit200RC.Rdata') -#save(clfitlist, file = 'clfitlist200.Rdata') -#rm(ci, matcontrib0, is_normal, lims, selvar) -#gc() - - -res <- lapply(clfitlist, function(x) x$clustering) -names(res) <- 1:iters - -save(data.frame(res), file = 'res/clfitdf200.Rdata') - - -## d. Analyze results ########## - -#1. Répartition du nombre d'observation par cluster -#plot(sort(table(clfit$clustering), decreasing = TRUE), -# type = 'l', ylab = 'Fréquence', xlab = 'Classe') - - -#clust <- res$clustering -# centres <- aggregate(conso, clust) -# table(clust) - - #sel_veille <- as.Date(rownames(conso)[sel - 1]) - #sel_lendemain <- as.Date(rownames(conso)[sel + 1]) - - #res_clust <- data.frame(date = rownames(conso), - #veille = weekdays(sel_veille), - #lendemain = weekdays(sel_lendemain), - # clust = clust) - - #for(k in 1:K) { - # assign(paste0("dates_clust", K), - # substr(subset(res_clust, clust == k)$date, 1, 7) ) - #} - - #dev.off() - - #save(file = paste0(dtitle, "_clust.Rdata"), - #res_clust, selvar, K, gap) -#} - -#dates_clust1 <- substr(subset(dates, clust == 1)$date, 1, 7)