--- /dev/null
+## File : 03_compute-sum-of-classes_2010.r
+## Description : Calculer les synchrones pour chaque groupe obtenu par le
+## clustering.
+
+rm(list = ls())
+
+setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
+
+## 1. Read auxiliar data files ####
+
+identifiants <- read.table("identifs.txt")[ ,1]
+dates0 <- read.table("datesall.txt")[, 1]
+dates <- dates0[grep("2010", dates0)]
+rm(dates0)
+
+n <- length(identifiants)
+p <- length(dates)
+
+blocks <- c(rep(6500, 3), 5511)
+
+# Fit of the clustering : clfit
+#load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit500.Rdata')
+load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200.Rdata')
+load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200RC.Rdata')
+# table(clfit$clustering)
+
+dfclust <- data.frame(cluster = clfit$clustering)
+# read write.table(dfclust, file = "../res/clfit200RC-random.txt") for random
+
+K <- nrow(clfit$clusinfo)
+#dfclust <- head(dfclust, 50) # just for testing purpouses
+synchros <- matrix(0, ncol = p, nrow = K)
+rm(clfit)
+
+## 2. Process the large file ####
+
+close(con)
+con <- file("~/tmp/2010_full.txt") # Establish a connection to the file
+open(con, "r") # Open the connection
+
+for(b in seq_along(blocks)){ # Reading loop
+ nb <- blocks[b]
+ actual <- readLines(con = con, n = nb )
+ auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
+ rm(actual)
+
+ datamat <- t(apply(auxmat[, -1], 1, as.numeric))
+ rownames(datamat) <- substr(auxmat[, 1], 2, 7)
+ rm(auxmat)
+
+ # obtain for each line of datamat the cluster membership (if any)
+ clustfactor <- dfclust$cluster[match(rownames(datamat), rownames(dfclust))]
+
+ for(k in 1:K){
+ clustk <- which(clustfactor == k)
+ if(length(clustk) > 0) {
+
+ if(length(clustk) > 1) {
+ synchrosk <- colSums(datamat[which(clustfactor == k), ])
+ } else {
+ synchrosk <- datamat[which(clustfactor == k), ]
+ }
+ synchros[k, ] <- synchros[k, ] + synchrosk
+ rm(synchrosk)
+ }
+
+ }
+}
+
+close(con) # close connection to the file
+
+synchros <- data.frame(t(synchros), total = colSums(synchros))
+
+# write.table(synchros, file = "~/tmp/2010_synchros200RC.txt")
+# write.table(synchros, file = "~/tmp/2010_synchros200-random.txt")
+#
+#
+
+
+dfclust <- read.table("clfit200muchos.txt")
+
+for(pepe in 1:10) {
+ synchros <- matrix(0, ncol = p, nrow = K)
+ clustfactor <- dfclust[match(rownames(datamat), rownames(tdata)), pepe]
+ for(k in 1:K){
+ clustk <- which(clustfactor == k)
+ if(length(clustk) > 0) {
+ if(length(clustk) > 1) {
+ synchrosk <- colSums(datamat[which(clustfactor == k), ])
+ } else {
+ synchrosk <- datamat[which(clustfactor == k), ]
+ }
+ synchros[k, ] <- synchros[k, ] + synchrosk
+ rm(synchrosk)
+ }
+ }
+
+ synchros <- data.frame(t(synchros), total = colSums(synchros))
+ write.table(synchros, file = paste0(colnames(dfclust)[pepe], "2010.txt"))
+}
+