+++ /dev/null
-## File : 03_compute-sum-of-classes_2009.r
-## Description : Calculer les synchrones pour chaque groupe obtenu par le
-## clustering.
-
-rm(list = ls())
-
-MOJARRITA <- Sys.info()[4] == "mojarrita"
-
-if(MOJARRITA){
- setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
-} else {
- setwd("~/2014_EDF-Orsay-Lyon2/codes/")
-}
-
-
-## 1. Read auxiliar data files ####
-
-identifiants <- read.table("identifs.txt")[ ,1]
-dates0 <- read.table("datesall.txt")[, 1]
-dates <- dates0[grep("2009", dates0)]
-rm(dates0)
-
-n <- length(identifiants)
-p <- length(dates)
-
-if(MOJARRITA) {
- blocks <- c(rep(6500, 3), 5511)
- } else {
- blocks <- 25011
- }
-
-# Fit of the clustering : clfit
-load('../res/clfitdf200.Rdata') # Loads res that containts
- # clusterings memberships
-res <- as.data.frame(res)
-
-resRDN <- as.data.frame(lapply(res, sample))
-#save(file = "../res/clfitdf200RDN.Rdata", resRDN)
-load("../res/clfitdf200RDN.Rdata")
-
-lres <- length(res)
-K <- 200 #nrow(clfit$clusinfo)
-rm(res)
-
-## 2. Process the large file ####
-
-close(con)
-con <- file("~/tmp/2009_full.txt") # Establish a connection to the file
-open(con, "r") # Open the connection
-
-for(b in seq_along(blocks)){ # Reading loop
- nb <- blocks[b]
- actual <- readLines(con = con, n = nb )
- actual_split <- strsplit(actual, " ")
- rm(actual)
-
- #auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
- #datamat <- t(apply(auxmat[, -1], 1, as.numeric)) # the NA introduced by
- # as.numeric
- auxlist <- lapply(actual_split, function(x) as.numeric(x[-1]))
- datamat <- matrix(unlist(auxlist), ncol = p, byrow = TRUE)
- rm(auxlist)
-
- # are NA strings
- #rownames(datamat) <- substr(auxmat[, 1], 2, 7)
- auxnames <- unlist(lapply(strsplit(actual, " "), "[", 1))
- rownames(datamat) <- substr(auxnames, 2, 7)
-
- rm(auxmat, actual_split)
-
- synchros <- lapply(resRDN,
- function(ll) {
- aux <- matrix(0, ncol = p, nrow = K)
- for(k in 1:K) {
- clustk <- which(ll == k)
- if(length(clustk) > 1) {
- aux[k, ] <- colSums(datamat[clustk, ])
- } else {
- aux[k, ] <- datamat[clustk, ]
- }
- }
- aux
- })
-}
-
-close(con) # close connection to the file
-
-# save(synchros, file = "~/tmp/2009synchrosdf200RND")
-