## File : 03_compute-sum-of-classes_2010.r
## Description : Calculer les synchrones pour chaque groupe obtenu par le
##               clustering. 

rm(list = ls())

setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")

## 1. Read auxiliar data files ####

identifiants <- read.table("identifs.txt")[ ,1]
dates0       <- read.table("datesall.txt")[, 1]
dates        <- dates0[grep("2010", dates0)]
rm(dates0)

n <- length(identifiants)
p <- length(dates)

blocks <- c(rep(6500, 3), 5511)  

# Fit of the clustering : clfit 
#load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit500.Rdata')
load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200.Rdata')
load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200RC.Rdata')
#  table(clfit$clustering)

dfclust <- data.frame(cluster = clfit$clustering)
# read write.table(dfclust, file = "../res/clfit200RC-random.txt") for random

K       <- nrow(clfit$clusinfo)
#dfclust <- head(dfclust, 50)           # just for testing purpouses
synchros <- matrix(0, ncol = p, nrow = K)
rm(clfit)
                   
## 2. Process the large file ####

close(con)
con <- file("~/tmp/2010_full.txt")  # Establish a connection to the file
open(con, "r")                      # Open the connection

for(b in seq_along(blocks)){        # Reading loop
  nb <- blocks[b]
  actual <- readLines(con = con, n = nb )
  auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
  rm(actual)

  datamat <- t(apply(auxmat[, -1], 1, as.numeric))
  rownames(datamat) <- substr(auxmat[, 1], 2, 7)
  rm(auxmat)

  # obtain for each line of datamat the cluster membership (if any)
  clustfactor <- dfclust$cluster[match(rownames(datamat), rownames(dfclust))] 
  
  for(k in 1:K){ 
    clustk <- which(clustfactor == k)
    if(length(clustk) > 0) {
      
      if(length(clustk) > 1) {
        synchrosk <- colSums(datamat[which(clustfactor == k), ])
      } else {
        synchrosk <- datamat[which(clustfactor == k), ]
      }
      synchros[k, ] <- synchros[k, ] + synchrosk
      rm(synchrosk)
    }
    
  }
}

close(con)                      # close connection to the file

synchros <- data.frame(t(synchros), total = colSums(synchros))

# write.table(synchros, file = "~/tmp/2010_synchros200RC.txt")
# write.table(synchros, file = "~/tmp/2010_synchros200-random.txt")
# 
# 


dfclust <- read.table("clfit200muchos.txt")

for(pepe in 1:10) {
  synchros <- matrix(0, ncol = p, nrow = K)
  clustfactor <- dfclust[match(rownames(datamat), rownames(tdata)), pepe]
  for(k in 1:K){ 
    clustk <- which(clustfactor == k)
    if(length(clustk) > 0) {
      if(length(clustk) > 1) {
        synchrosk <- colSums(datamat[which(clustfactor == k), ])
        } else {
        synchrosk <- datamat[which(clustfactor == k), ]
        }
      synchros[k, ] <- synchros[k, ] + synchrosk
      rm(synchrosk)
    }
  }
  
  synchros <- data.frame(t(synchros), total = colSums(synchros))
  write.table(synchros, file = paste0(colnames(dfclust)[pepe], "2010.txt"))
}