X-Git-Url: https://git.auder.net/?a=blobdiff_plain;ds=sidebyside;f=codes_Jairo_stage2%2F02_cluster_2009.r;fp=codes_Jairo_stage2%2F02_cluster_2009.r;h=0000000000000000000000000000000000000000;hb=48acd629923e19f1957fe34e21fe0530de8a46e0;hp=f08d82ba358d583aa443a9e9a650e7fbc4bf514a;hpb=ad642dc605bbbd0b3fa890c78fa8d634b1d6f703;p=epclust.git

diff --git a/codes_Jairo_stage2/02_cluster_2009.r b/codes_Jairo_stage2/02_cluster_2009.r
deleted file mode 100644
index f08d82b..0000000
--- a/codes_Jairo_stage2/02_cluster_2009.r
+++ /dev/null
@@ -1,86 +0,0 @@
-## File: extract-features.r
-
-rm(list = ls())
-
-## a. Load data & libraries ####
-
-library(cluster)
-MOJARRITA <- Sys.info()[4] ==  "mojarrita"
-
-#source('http://eric.univ-lyon2.fr/~jcugliari/codes/functional-clustering.r')
-source('01_StBr.r')
-
-if(MOJARRITA){ 
-  setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
-} else {
-  setwd("~/ownCloud/projects/2014_EDF-Orsay-Lyon2/codes/")
-}
-
-
-matcontrib0 <- read.table(file = "~/tmp/2009_contrib.txt")
-n     <- nrow(matcontrib0)
-
-sdcontrib <- apply(matcontrib0, 1, sd)
-lims      <- quantile(sdcontrib, probs = c(.005, .995)) # obtain 1%-extreme data
-is_normal <- which((sdcontrib > lims[1]) & (sdcontrib < lims[2]))
-
-matcontri_ext <- matcontrib0[-is_normal, ]""
-matcontrib    <- matcontrib0[is_normal, ]     # wipe out aberrant data
-
-matcontrib <- t(apply(matcontrib, 1, function(x) x / sum(x)))
-matcontrib <- t(apply(matcontrib, 1, function(p) log(p / (1 - p)) ))
-
-
-## b. Transform data & compute CI ####
-ci     <- CI(matcontrib)
-tdata  <- ci$tdata; rownames(tdata) <- rownames(matcontrib)
-selvar <- ci$selectv
-  
-## c. Clustering  ##########
-K <- 200
-system.time(
-  clfit <- clara(x        = tdata[, selvar], 
-                 k        = K, 
-                 sampsize = 4000, 
-                 samples  = 4, 
-                 rngR     = TRUE)
-)
-
-#save(clfit, file = 'clfit500.Rdata')
-# save(clfit, file = 'clfit200RC.Rdata')
-save(clfit, file = 'clfit200.Rdata')
-
-rm(ci, matcontrib0, is_normal, lims, selvar)
-gc()
-
-## d. Analyze results  ##########
-
-#1. RÃ©partition du nombre d'observation par cluster
-#plot(sort(table(clfit$clustering), decreasing = TRUE), 
-#     type = 'l', ylab = 'FrÃ©quence', xlab = 'Classe')
-
-
-#clust <- res$clustering
-#  centres <- aggregate(conso, clust)
-#  table(clust)
-  
-  #sel_veille    <- as.Date(rownames(conso)[sel - 1])
-  #sel_lendemain <- as.Date(rownames(conso)[sel + 1])
-  
-  #res_clust <- data.frame(date     = rownames(conso),
-                          #veille    = weekdays(sel_veille),
-                          #lendemain = weekdays(sel_lendemain),
-  #                        clust     = clust)
-  
-  #for(k in 1:K) {
-  #  assign(paste0("dates_clust", K),
-  #         substr(subset(res_clust, clust == k)$date, 1, 7) )
-  #}
-  
-  #dev.off()
-  
-  #save(file = paste0(dtitle, "_clust.Rdata"), 
-       #res_clust, selvar, K, gap)
-#}
-
-#dates_clust1 <- substr(subset(dates, clust == 1)$date, 1, 7)