1 ## File : 03_compute-sum-of-classes_2010.r
2 ## Description : Calculer les synchrones pour chaque groupe obtenu par le
7 setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
9 ## 1. Read auxiliar data files ####
11 identifiants <- read.table("identifs.txt")[ ,1]
12 dates0 <- read.table("datesall.txt")[, 1]
13 dates <- dates0[grep("2009", dates0)]
16 n <- length(identifiants)
19 blocks <- c(rep(6500, 3), 5511)
21 # Fit of the clustering : clfit
22 #load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit500.Rdata')
23 load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200.Rdata')
24 #load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200RC.Rdata')
25 # table(clfit$clustering)
27 dfclust <- data.frame(cluster = clfit$clustering) # real clustering
28 K <- nrow(clfit$clusinfo)
31 dfclust$cluster <- sample(unlist(Map(rep, 1:200, table(dfclust)))) # random clust
35 #dfclust <- head(dfclust, 50) # just for testing purpouses
36 synchros <- matrix(0, ncol = p, nrow = K)
39 ## 2. Process the large file ####
42 con <- file("~/tmp/2010_full.txt") # Establish a connection to the file
43 open(con, "r") # Open the connection
45 for(b in seq_along(blocks)){ # Reading loop
47 actual <- readLines(con = con, n = nb )
48 auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
51 datamat <- t(apply(auxmat[, -1], 1, as.numeric))
52 rownames(datamat) <- substr(auxmat[, 1], 2, 7)
55 # obtain for each line of datamat the cluster membership (if any)
56 clustfactor <- dfclust$cluster[match(rownames(datamat), rownames(dfclust))]
58 # if(length(which(rownames(datamat) == "218095"))>0) print(b);
61 clustk <- which(clustfactor == k)
62 if(length(clustk) > 0) {
64 if(length(clustk) > 1) {
65 synchrosk <- colSums(datamat[which(clustfactor == k), ])
67 synchrosk <- datamat[which(clustfactor == k), ]
69 synchros[k, ] <- synchros[k, ] + synchrosk
75 close(con) # close connection to the file
77 synchros <- data.frame(t(synchros), total = colSums(synchros))
79 write.table(synchros, file = "~/tmp/2009_synchros200RANDOM.txt")
80 write.table(dfclust, file = "../res/dfclust200RANDOM.txt")
81 #write.table(synchros, file = "~/tmp/2009_synchros200RC.txt")