1 ## File : 03_compute-sum-of-classes_2010-par.r
2 ## Description : Calculer les synchrones pour chaque groupe obtenu par le
7 MOJARRITA <- Sys.info()[4] == "mojarrita"
10 setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
12 setwd("~/2014_EDF-Orsay-Lyon2/codes/")
17 ## 1. Read auxiliar data files ####
19 identifiants <- read.table("identifs.txt")[ ,1]
20 dates0 <- read.table("datesall.txt")[, 1]
21 dates <- dates0[grep("2010", dates0)]
24 n <- length(identifiants)
29 blocks <- c(rep(6500, 3), 5511)
34 # Fit of the clustering : clfit
35 load('../res/clfitdf200.Rdata') # Loads res that containts
36 # clusterings memberships
37 res <- as.data.frame(res)
41 K <- 200 #nrow(clfit$clusinfo)
43 ## 2. Process the large file ####
46 con <- file("~/tmp/2010_full.txt") # Establish a connection to the file
47 open(con, "r") # Open the connection
49 for(b in seq_along(blocks)){ # Reading loop
51 actual <- readLines(con = con, n = nb )
52 auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
55 datamat <- t(apply(auxmat[, -1], 1, as.numeric)) # the NA introduced by as.numeric
57 rownames(datamat) <- substr(auxmat[, 1], 2, 7)
60 synchros <- lapply(res,
62 aux <- matrix(0, ncol = p, nrow = K)
64 clustk <- which(ll == k)
65 if(length(clustk) > 1) {
66 aux[k, ] <- colSums(datamat[ll == k, ])
68 aux[k, ] <- datamat[ll == k, ]
75 close(con) # close connection to the file
77 # save(synchros, file = "~/tmp/2010synchrosdf200WER")