complete first draft of package
[epclust.git] / old_C_code / stage2_UNFINISHED / src / unused / 03_compute-sums-of-classes_2010-par.r
CommitLineData
ad642dc6
BA
1## File : 03_compute-sum-of-classes_2010-par.r
2## Description : Calculer les synchrones pour chaque groupe obtenu par le
3## clustering.
4
5rm(list = ls())
6
7MOJARRITA <- Sys.info()[4] == "mojarrita"
8
9if(MOJARRITA){
10 setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
11} else {
12 setwd("~/2014_EDF-Orsay-Lyon2/codes/")
13}
14
15
16
17## 1. Read auxiliar data files ####
18
19identifiants <- read.table("identifs.txt")[ ,1]
20dates0 <- read.table("datesall.txt")[, 1]
21dates <- dates0[grep("2010", dates0)]
22rm(dates0)
23
24n <- length(identifiants)
25p <- length(dates)
26
27
28if(MOJARRITA) {
29 blocks <- c(rep(6500, 3), 5511)
30} else {
31 blocks <- 25011
32}
33
34# Fit of the clustering : clfit
35load('../res/clfitdf200.Rdata') # Loads res that containts
36# clusterings memberships
37res <- as.data.frame(res)
38
39
40lres <- length(res)
41K <- 200 #nrow(clfit$clusinfo)
42
43## 2. Process the large file ####
44
45close(con)
46con <- file("~/tmp/2010_full.txt") # Establish a connection to the file
47open(con, "r") # Open the connection
48
49for(b in seq_along(blocks)){ # Reading loop
50 nb <- blocks[b]
51 actual <- readLines(con = con, n = nb )
52 auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
53 rm(actual)
54
55 datamat <- t(apply(auxmat[, -1], 1, as.numeric)) # the NA introduced by as.numeric
56 # are NA strings
57 rownames(datamat) <- substr(auxmat[, 1], 2, 7)
58 rm(auxmat)
59
60 synchros <- lapply(res,
61 function(ll) {
62 aux <- matrix(0, ncol = p, nrow = K)
63 for(k in 1:K) {
64 clustk <- which(ll == k)
65 if(length(clustk) > 1) {
66 aux[k, ] <- colSums(datamat[ll == k, ])
67 } else {
68 aux[k, ] <- datamat[ll == k, ]
69 }
70 }
71 aux
72 })
73}
74
75 close(con) # close connection to the file
76
77# save(synchros, file = "~/tmp/2010synchrosdf200WER")
78
79