complete first draft of package
[epclust.git] / old_C_code / stage2_UNFINISHED / src / unused / 03_compute-sums-of-classes_2010.r
1 ## File : 03_compute-sum-of-classes_2010.r
2 ## Description : Calculer les synchrones pour chaque groupe obtenu par le
3 ## clustering.
4
5 rm(list = ls())
6
7 setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
8
9 ## 1. Read auxiliar data files ####
10
11 identifiants <- read.table("identifs.txt")[ ,1]
12 dates0 <- read.table("datesall.txt")[, 1]
13 dates <- dates0[grep("2010", dates0)]
14 rm(dates0)
15
16 n <- length(identifiants)
17 p <- length(dates)
18
19 blocks <- c(rep(6500, 3), 5511)
20
21 # Fit of the clustering : clfit
22 #load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit500.Rdata')
23 load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200.Rdata')
24 load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200RC.Rdata')
25 # table(clfit$clustering)
26
27 dfclust <- data.frame(cluster = clfit$clustering)
28 # read write.table(dfclust, file = "../res/clfit200RC-random.txt") for random
29
30 K <- nrow(clfit$clusinfo)
31 #dfclust <- head(dfclust, 50) # just for testing purpouses
32 synchros <- matrix(0, ncol = p, nrow = K)
33 rm(clfit)
34
35 ## 2. Process the large file ####
36
37 close(con)
38 con <- file("~/tmp/2010_full.txt") # Establish a connection to the file
39 open(con, "r") # Open the connection
40
41 for(b in seq_along(blocks)){ # Reading loop
42 nb <- blocks[b]
43 actual <- readLines(con = con, n = nb )
44 auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
45 rm(actual)
46
47 datamat <- t(apply(auxmat[, -1], 1, as.numeric))
48 rownames(datamat) <- substr(auxmat[, 1], 2, 7)
49 rm(auxmat)
50
51 # obtain for each line of datamat the cluster membership (if any)
52 clustfactor <- dfclust$cluster[match(rownames(datamat), rownames(dfclust))]
53
54 for(k in 1:K){
55 clustk <- which(clustfactor == k)
56 if(length(clustk) > 0) {
57
58 if(length(clustk) > 1) {
59 synchrosk <- colSums(datamat[which(clustfactor == k), ])
60 } else {
61 synchrosk <- datamat[which(clustfactor == k), ]
62 }
63 synchros[k, ] <- synchros[k, ] + synchrosk
64 rm(synchrosk)
65 }
66
67 }
68 }
69
70 close(con) # close connection to the file
71
72 synchros <- data.frame(t(synchros), total = colSums(synchros))
73
74 # write.table(synchros, file = "~/tmp/2010_synchros200RC.txt")
75 # write.table(synchros, file = "~/tmp/2010_synchros200-random.txt")
76 #
77 #
78
79
80 dfclust <- read.table("clfit200muchos.txt")
81
82 for(pepe in 1:10) {
83 synchros <- matrix(0, ncol = p, nrow = K)
84 clustfactor <- dfclust[match(rownames(datamat), rownames(tdata)), pepe]
85 for(k in 1:K){
86 clustk <- which(clustfactor == k)
87 if(length(clustk) > 0) {
88 if(length(clustk) > 1) {
89 synchrosk <- colSums(datamat[which(clustfactor == k), ])
90 } else {
91 synchrosk <- datamat[which(clustfactor == k), ]
92 }
93 synchros[k, ] <- synchros[k, ] + synchrosk
94 rm(synchrosk)
95 }
96 }
97
98 synchros <- data.frame(t(synchros), total = colSums(synchros))
99 write.table(synchros, file = paste0(colnames(dfclust)[pepe], "2010.txt"))
100 }
101