## File : 03_compute-sum-of-classes_2010.r ## Description : Calculer les synchrones pour chaque groupe obtenu par le ## clustering. rm(list = ls()) setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/") ## 1. Read auxiliar data files #### identifiants <- read.table("identifs.txt")[ ,1] dates0 <- read.table("datesall.txt")[, 1] dates <- dates0[grep("2010", dates0)] rm(dates0) n <- length(identifiants) p <- length(dates) blocks <- c(rep(6500, 3), 5511) # Fit of the clustering : clfit #load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit500.Rdata') load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200.Rdata') load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200RC.Rdata') # table(clfit$clustering) dfclust <- data.frame(cluster = clfit$clustering) # read write.table(dfclust, file = "../res/clfit200RC-random.txt") for random K <- nrow(clfit$clusinfo) #dfclust <- head(dfclust, 50) # just for testing purpouses synchros <- matrix(0, ncol = p, nrow = K) rm(clfit) ## 2. Process the large file #### close(con) con <- file("~/tmp/2010_full.txt") # Establish a connection to the file open(con, "r") # Open the connection for(b in seq_along(blocks)){ # Reading loop nb <- blocks[b] actual <- readLines(con = con, n = nb ) auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE) rm(actual) datamat <- t(apply(auxmat[, -1], 1, as.numeric)) rownames(datamat) <- substr(auxmat[, 1], 2, 7) rm(auxmat) # obtain for each line of datamat the cluster membership (if any) clustfactor <- dfclust$cluster[match(rownames(datamat), rownames(dfclust))] for(k in 1:K){ clustk <- which(clustfactor == k) if(length(clustk) > 0) { if(length(clustk) > 1) { synchrosk <- colSums(datamat[which(clustfactor == k), ]) } else { synchrosk <- datamat[which(clustfactor == k), ] } synchros[k, ] <- synchros[k, ] + synchrosk rm(synchrosk) } } } close(con) # close connection to the file synchros <- data.frame(t(synchros), total = colSums(synchros)) # write.table(synchros, file = "~/tmp/2010_synchros200RC.txt") # write.table(synchros, file = "~/tmp/2010_synchros200-random.txt") # # dfclust <- read.table("clfit200muchos.txt") for(pepe in 1:10) { synchros <- matrix(0, ncol = p, nrow = K) clustfactor <- dfclust[match(rownames(datamat), rownames(tdata)), pepe] for(k in 1:K){ clustk <- which(clustfactor == k) if(length(clustk) > 0) { if(length(clustk) > 1) { synchrosk <- colSums(datamat[which(clustfactor == k), ]) } else { synchrosk <- datamat[which(clustfactor == k), ] } synchros[k, ] <- synchros[k, ] + synchrosk rm(synchrosk) } } synchros <- data.frame(t(synchros), total = colSums(synchros)) write.table(synchros, file = paste0(colnames(dfclust)[pepe], "2010.txt")) }