## File : 03_compute-sum-of-classes_2009.r ## Description : Calculer les synchrones pour chaque groupe obtenu par le ## clustering. rm(list = ls()) MOJARRITA <- Sys.info()[4] == "mojarrita" if(MOJARRITA){ setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/") } else { setwd("~/2014_EDF-Orsay-Lyon2/codes/") } ## 1. Read auxiliar data files #### identifiants <- read.table("identifs.txt")[ ,1] dates0 <- read.table("datesall.txt")[, 1] dates <- dates0[grep("2009", dates0)] rm(dates0) n <- length(identifiants) p <- length(dates) if(MOJARRITA) { blocks <- c(rep(6500, 3), 5511) } else { blocks <- 25011 } # Fit of the clustering : clfit load('../res/clfitdf200.Rdata') # Loads res that containts # clusterings memberships res <- as.data.frame(res) resRDN <- as.data.frame(lapply(res, sample)) #save(file = "../res/clfitdf200RDN.Rdata", resRDN) load("../res/clfitdf200RDN.Rdata") lres <- length(res) K <- 200 #nrow(clfit$clusinfo) rm(res) ## 2. Process the large file #### close(con) con <- file("~/tmp/2009_full.txt") # Establish a connection to the file open(con, "r") # Open the connection for(b in seq_along(blocks)){ # Reading loop nb <- blocks[b] actual <- readLines(con = con, n = nb ) actual_split <- strsplit(actual, " ") rm(actual) #auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE) #datamat <- t(apply(auxmat[, -1], 1, as.numeric)) # the NA introduced by # as.numeric auxlist <- lapply(actual_split, function(x) as.numeric(x[-1])) datamat <- matrix(unlist(auxlist), ncol = p, byrow = TRUE) rm(auxlist) # are NA strings #rownames(datamat) <- substr(auxmat[, 1], 2, 7) auxnames <- unlist(lapply(strsplit(actual, " "), "[", 1)) rownames(datamat) <- substr(auxnames, 2, 7) rm(auxmat, actual_split) synchros <- lapply(resRDN, function(ll) { aux <- matrix(0, ncol = p, nrow = K) for(k in 1:K) { clustk <- which(ll == k) if(length(clustk) > 1) { aux[k, ] <- colSums(datamat[clustk, ]) } else { aux[k, ] <- datamat[clustk, ] } } aux }) } close(con) # close connection to the file # save(synchros, file = "~/tmp/2009synchrosdf200RND")