complete first draft of package
[epclust.git] / old_C_code / stage2_UNFINISHED / src / 03_compute-sums-of-classes_2009.r
CommitLineData
ad642dc6
BA
1## File : 03_compute-sum-of-classes_2009.r
2## Description : Calculer les synchrones pour chaque groupe obtenu par le
3## clustering.
4
5rm(list = ls())
6
7setwd("~/ownCloud/projects/2014_EDF-Orsay-Lyon2/codes/")
8
9## 1. Read auxiliar data files ####
10
11identifiants <- read.table("identifs.txt")[ ,1]
12dates0 <- read.table("datesall.txt")[, 1]
13dates <- dates0[grep("2009", dates0)]
14rm(dates0)
15
16n <- length(identifiants)
17p <- length(dates)
18
19blocks <- c(rep(6500, 3), 5511)
20
21# Fit of the clustering : clfit
22#load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit500.Rdata')
23#load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200.Rdata')
24load('../res/clfit200RC.Rdata')
25# table(clfit$clustering)
26
27names(clfit$clustering) <- sample(names(clfit$clustering))# for random classes
28dfclust <- data.frame(cluster = clfit$clustering)
29# write.table(dfclust, file = "../res/clfit200-random.txt")
30
31K <- nrow(clfit$clusinfo)
32#dfclust <- head(dfclust, 50) # just for testing purpouses
33synchros <- matrix(0, ncol = p, nrow = K)
34rm(clfit)
35
36## 2. Process the large file ####
37
38close(con)
39con <- file("~/tmp/2009_full.txt") # Establish a connection to the file
40open(con, "r") # Open the connection
41
42for(b in seq_along(blocks)){ # Reading loop
43 nb <- blocks[b]
44 actual <- readLines(con = con, n = nb )
45 auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
46 rm(actual)
47
48 datamat <- t(apply(auxmat[, -1], 1, as.numeric))
49 rownames(datamat) <- substr(auxmat[, 1], 2, 7)
50 rm(auxmat)
51
52 # obtain for each line of datamat the cluster membership (if any)
53 clustfactor <- dfclust$cluster[match(rownames(datamat), rownames(dfclust))]
54
55# if(length(which(rownames(datamat) == "218095"))>0) print(b);
56
572d139a
BA
57
58 #TODO: en C : lourd
ad642dc6
BA
59 for(k in 1:K){
60 clustk <- which(clustfactor == k)
61 if(length(clustk) > 0) {
62
63 if(length(clustk) > 1) {
64 synchrosk <- colSums(datamat[which(clustfactor == k), ])
65 } else {
66 synchrosk <- datamat[which(clustfactor == k), ]
67 }
68 synchros[k, ] <- synchros[k, ] + synchrosk
69 rm(synchrosk)
70 }
71 }
72}
73
74close(con) # close connection to the file
75
76synchros <- data.frame(t(synchros), total = colSums(synchros))
77
78# write.table(synchros, file = "~/tmp/2009_synchros200RC.txt")
79# write.table(synchros, file = "~/tmp/2009_synchros200-random.txt")
80