## File : 05_cluster2step.r ## Description : rm(list = ls()) setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/") source("~/Documents/projects/2014_EDF-Lyon2/R/01_StBr.r") # aux functs source('http://eric.univ-lyon2.fr/~jcugliari/codes/functional-clustering.r') library(cluster) ## 1. Read auxiliar data files #### identifiants <- read.table("identifs.txt")[ ,1] dates0 <- read.table("datesall.txt")[, 1] dates <- as.character(dates0[grep("2009", dates0)]) rm(dates0) n <- length(identifiants) p <- length(dates) synchros09 <- as.matrix(read.table("~/tmp/2009_synchros200.txt")) #synchros09 <- as.matrix(read.table("~/tmp/2009_synchros200RC.txt")) nas <- which(is.na(synchros09)[1, ]) # some 1/1/2009 are missing synchros09[nas, 1] <- colMeans(synchros09[2:4, nas]) #synchros09 <- t(synchros09) # series on rows, use if necessary ## 2. Compute contributions #### auxDWT <- t(apply(synchros09, 1, toDWT)) matcontrib <- t(apply(auxDWT, 1, contrib, rel = TRUE, logit = TRUE)) rm(auxDWT) ## 3. Transform data & compute CI #### ci <- CI(matcontrib[-201, ]) # last row has the total tdata <- ci$tdata; rownames(tdata) <- rownames(matcontrib)[-201] hc <- hclust(dist(tdata[, ci$selectv]), method = "ward.D") plot(hc) #clust <- cutree(hc, 2) for(K in 2:30){ #K <- 3 pamfit <- pam(tdata[-201, ci$selectv], k = K, stand = FALSE) #table(pamfit$clustering) SC <- matrix(0, ncol = p, nrow = K) clustfactor <- pamfit$clustering # for(k in 1:K){ # clustk <- which(clustfactor == k) # if(length(clustk) > 0) { # if(length(clustk) > 1) { # SCk <- colSums(synchros09[which(clustfactor == k), ]) # } else { # SCk <- synchros09[which(clustfactor == k), ] # } # SC[k, ] <- SC[k, ] + SCk # rm(SCk) # } #} write.table(clustfactor, file = paste0("~/tmp/clustfactorRC", K, ".txt")) #write.table(clustfactor, file = "~/tmp/clustfactor3.txt") } # Plots layout(1) matplot(t(SC)[48*10 + 1:(48*30), ], type = 'l', ylab = '',col = 1:3, lty = 1) matplot(t(SC)[48*100 + 1:(48*30), ], type = 'l', ylab = '', col = 1:3, lty = 1)