+## File : 03_compute-sum-of-classes_2009.r
+## Description : Calculer les synchrones pour chaque groupe obtenu par le
+## clustering.
+
+rm(list = ls())
+
+MOJARRITA <- Sys.info()[4] == "mojarrita"
+
+if(MOJARRITA){
+ setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
+} else {
+ setwd("~/2014_EDF-Orsay-Lyon2/codes/")
+}
+
+
+## 1. Read auxiliar data files ####
+
+identifiants <- read.table("identifs.txt")[ ,1]
+dates0 <- read.table("datesall.txt")[, 1]
+dates <- dates0[grep("2009", dates0)]
+rm(dates0)
+
+n <- length(identifiants)
+p <- length(dates)
+
+if(MOJARRITA) {
+ blocks <- c(rep(6500, 3), 5511)
+ } else {
+ blocks <- 25011
+ }
+
+# Fit of the clustering : clfit
+load('../res/clfitdf200.Rdata') # Loads res that containts
+ # clusterings memberships
+res <- as.data.frame(res)
+
+resRDN <- as.data.frame(lapply(res, sample))
+#save(file = "../res/clfitdf200RDN.Rdata", resRDN)
+load("../res/clfitdf200RDN.Rdata")
+
+lres <- length(res)
+K <- 200 #nrow(clfit$clusinfo)
+rm(res)
+
+## 2. Process the large file ####
+
+close(con)
+con <- file("~/tmp/2009_full.txt") # Establish a connection to the file
+open(con, "r") # Open the connection
+
+for(b in seq_along(blocks)){ # Reading loop
+ nb <- blocks[b]
+ actual <- readLines(con = con, n = nb )
+ actual_split <- strsplit(actual, " ")
+ rm(actual)
+
+ #auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
+ #datamat <- t(apply(auxmat[, -1], 1, as.numeric)) # the NA introduced by
+ # as.numeric
+ auxlist <- lapply(actual_split, function(x) as.numeric(x[-1]))
+ datamat <- matrix(unlist(auxlist), ncol = p, byrow = TRUE)
+ rm(auxlist)
+
+ # are NA strings
+ #rownames(datamat) <- substr(auxmat[, 1], 2, 7)
+ auxnames <- unlist(lapply(strsplit(actual, " "), "[", 1))
+ rownames(datamat) <- substr(auxnames, 2, 7)
+
+ rm(auxmat, actual_split)
+
+ synchros <- lapply(resRDN,
+ function(ll) {
+ aux <- matrix(0, ncol = p, nrow = K)
+ for(k in 1:K) {
+ clustk <- which(ll == k)
+ if(length(clustk) > 1) {
+ aux[k, ] <- colSums(datamat[clustk, ])
+ } else {
+ aux[k, ] <- datamat[clustk, ]
+ }
+ }
+ aux
+ })
+}
+
+close(con) # close connection to the file
+
+# save(synchros, file = "~/tmp/2009synchrosdf200RND")
+