Commit | Line | Data |
---|---|---|
ad642dc6 BA |
1 | ## File : 03_compute-sum-of-classes_2010.r |
2 | ## Description : Calculer les synchrones pour chaque groupe obtenu par le | |
3 | ## clustering. | |
4 | ||
5 | rm(list = ls()) | |
6 | ||
7 | setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/") | |
8 | ||
9 | ## 1. Read auxiliar data files #### | |
10 | ||
11 | identifiants <- read.table("identifs.txt")[ ,1] | |
12 | dates0 <- read.table("datesall.txt")[, 1] | |
13 | dates <- dates0[grep("2010", dates0)] | |
14 | rm(dates0) | |
15 | ||
16 | n <- length(identifiants) | |
17 | p <- length(dates) | |
18 | ||
19 | blocks <- c(rep(6500, 3), 5511) | |
20 | ||
21 | # Fit of the clustering : clfit | |
22 | #load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit500.Rdata') | |
23 | load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200.Rdata') | |
24 | load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200RC.Rdata') | |
25 | # table(clfit$clustering) | |
26 | ||
27 | dfclust <- data.frame(cluster = clfit$clustering) | |
28 | # read write.table(dfclust, file = "../res/clfit200RC-random.txt") for random | |
29 | ||
30 | K <- nrow(clfit$clusinfo) | |
31 | #dfclust <- head(dfclust, 50) # just for testing purpouses | |
32 | synchros <- matrix(0, ncol = p, nrow = K) | |
33 | rm(clfit) | |
34 | ||
35 | ## 2. Process the large file #### | |
36 | ||
37 | close(con) | |
38 | con <- file("~/tmp/2010_full.txt") # Establish a connection to the file | |
39 | open(con, "r") # Open the connection | |
40 | ||
41 | for(b in seq_along(blocks)){ # Reading loop | |
42 | nb <- blocks[b] | |
43 | actual <- readLines(con = con, n = nb ) | |
44 | auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE) | |
45 | rm(actual) | |
46 | ||
47 | datamat <- t(apply(auxmat[, -1], 1, as.numeric)) | |
48 | rownames(datamat) <- substr(auxmat[, 1], 2, 7) | |
49 | rm(auxmat) | |
50 | ||
51 | # obtain for each line of datamat the cluster membership (if any) | |
52 | clustfactor <- dfclust$cluster[match(rownames(datamat), rownames(dfclust))] | |
53 | ||
54 | for(k in 1:K){ | |
55 | clustk <- which(clustfactor == k) | |
56 | if(length(clustk) > 0) { | |
57 | ||
58 | if(length(clustk) > 1) { | |
59 | synchrosk <- colSums(datamat[which(clustfactor == k), ]) | |
60 | } else { | |
61 | synchrosk <- datamat[which(clustfactor == k), ] | |
62 | } | |
63 | synchros[k, ] <- synchros[k, ] + synchrosk | |
64 | rm(synchrosk) | |
65 | } | |
66 | ||
67 | } | |
68 | } | |
69 | ||
70 | close(con) # close connection to the file | |
71 | ||
72 | synchros <- data.frame(t(synchros), total = colSums(synchros)) | |
73 | ||
74 | # write.table(synchros, file = "~/tmp/2010_synchros200RC.txt") | |
75 | # write.table(synchros, file = "~/tmp/2010_synchros200-random.txt") | |
76 | # | |
77 | # | |
78 | ||
79 | ||
80 | dfclust <- read.table("clfit200muchos.txt") | |
81 | ||
82 | for(pepe in 1:10) { | |
83 | synchros <- matrix(0, ncol = p, nrow = K) | |
84 | clustfactor <- dfclust[match(rownames(datamat), rownames(tdata)), pepe] | |
85 | for(k in 1:K){ | |
86 | clustk <- which(clustfactor == k) | |
87 | if(length(clustk) > 0) { | |
88 | if(length(clustk) > 1) { | |
89 | synchrosk <- colSums(datamat[which(clustfactor == k), ]) | |
90 | } else { | |
91 | synchrosk <- datamat[which(clustfactor == k), ] | |
92 | } | |
93 | synchros[k, ] <- synchros[k, ] + synchrosk | |
94 | rm(synchrosk) | |
95 | } | |
96 | } | |
97 | ||
98 | synchros <- data.frame(t(synchros), total = colSums(synchros)) | |
99 | write.table(synchros, file = paste0(colnames(dfclust)[pepe], "2010.txt")) | |
100 | } | |
101 |