complete first draft of package
[epclust.git] / old_C_code / stage2_UNFINISHED / src / unused / 03_compute-sums-of-classes-par_2009.r
CommitLineData
ad642dc6
BA
1## File : 03_compute-sum-of-classes_2009.r
2## Description : Calculer les synchrones pour chaque groupe obtenu par le
3## clustering.
4
5rm(list = ls())
6
7MOJARRITA <- Sys.info()[4] == "mojarrita"
8
9if(MOJARRITA){
10 setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
11} else {
12 setwd("~/2014_EDF-Orsay-Lyon2/codes/")
13}
14
15
16## 1. Read auxiliar data files ####
17
18identifiants <- read.table("identifs.txt")[ ,1]
19dates0 <- read.table("datesall.txt")[, 1]
20dates <- dates0[grep("2009", dates0)]
21rm(dates0)
22
23n <- length(identifiants)
24p <- length(dates)
25
26if(MOJARRITA) {
27 blocks <- c(rep(6500, 3), 5511)
28 } else {
29 blocks <- 25011
30 }
31
32# Fit of the clustering : clfit
33load('../res/clfitdf200.Rdata') # Loads res that containts
34 # clusterings memberships
35res <- as.data.frame(res)
36
37lres <- length(res)
38K <- 200 #nrow(clfit$clusinfo)
39
40
41## 2. Process the large file ####
42
43close(con)
44con <- file("~/tmp/2009_full.txt") # Establish a connection to the file
45open(con, "r") # Open the connection
46
47for(b in seq_along(blocks)){ # Reading loop
48 nb <- blocks[b]
49 actual <- readLines(con = con, n = nb )
50 auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
51 rm(actual)
52
53 datamat <- t(apply(auxmat[, -1], 1, as.numeric)) # the NA introduced by as.numeric
54 # are NA strings
55 rownames(datamat) <- substr(auxmat[, 1], 2, 7)
56 rm(auxmat)
57
58 synchros <- lapply(res,
59 function(ll) {
60 aux <- matrix(0, ncol = p, nrow = K)
61 for(k in 1:K) {
62 clustk <- which(ll == k)
63 if(length(clustk) > 1) {
64 aux[k, ] <- colSums(datamat[ll == k, ])
65 } else {
66 aux[k, ] <- datamat[ll == k, ]
67 }
68 }
69 aux
70 })
71}
72
73close(con) # close connection to the file
74
75# save(synchros, file = "~/tmp/2009synchrosdf200WER")
76