Commit | Line | Data |
---|---|---|
ad642dc6 BA |
1 | ## File : 03_compute-sum-of-classes_2010-par.r |
2 | ## Description : Calculer les synchrones pour chaque groupe obtenu par le | |
3 | ## clustering. | |
4 | ||
5 | rm(list = ls()) | |
6 | ||
7 | MOJARRITA <- Sys.info()[4] == "mojarrita" | |
8 | ||
9 | if(MOJARRITA){ | |
10 | setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/") | |
11 | } else { | |
12 | setwd("~/2014_EDF-Orsay-Lyon2/codes/") | |
13 | } | |
14 | ||
15 | ||
16 | ||
17 | ## 1. Read auxiliar data files #### | |
18 | ||
19 | identifiants <- read.table("identifs.txt")[ ,1] | |
20 | dates0 <- read.table("datesall.txt")[, 1] | |
21 | dates <- dates0[grep("2010", dates0)] | |
22 | rm(dates0) | |
23 | ||
24 | n <- length(identifiants) | |
25 | p <- length(dates) | |
26 | ||
27 | ||
28 | if(MOJARRITA) { | |
29 | blocks <- c(rep(6500, 3), 5511) | |
30 | } else { | |
31 | blocks <- 25011 | |
32 | } | |
33 | ||
34 | # Fit of the clustering : clfit | |
35 | load('../res/clfitdf200.Rdata') # Loads res that containts | |
36 | # clusterings memberships | |
37 | res <- as.data.frame(res) | |
38 | ||
39 | ||
40 | lres <- length(res) | |
41 | K <- 200 #nrow(clfit$clusinfo) | |
42 | ||
43 | ## 2. Process the large file #### | |
44 | ||
45 | close(con) | |
46 | con <- file("~/tmp/2010_full.txt") # Establish a connection to the file | |
47 | open(con, "r") # Open the connection | |
48 | ||
49 | for(b in seq_along(blocks)){ # Reading loop | |
50 | nb <- blocks[b] | |
51 | actual <- readLines(con = con, n = nb ) | |
52 | auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE) | |
53 | rm(actual) | |
54 | ||
55 | datamat <- t(apply(auxmat[, -1], 1, as.numeric)) # the NA introduced by as.numeric | |
56 | # are NA strings | |
57 | rownames(datamat) <- substr(auxmat[, 1], 2, 7) | |
58 | rm(auxmat) | |
59 | ||
60 | synchros <- lapply(res, | |
61 | function(ll) { | |
62 | aux <- matrix(0, ncol = p, nrow = K) | |
63 | for(k in 1:K) { | |
64 | clustk <- which(ll == k) | |
65 | if(length(clustk) > 1) { | |
66 | aux[k, ] <- colSums(datamat[ll == k, ]) | |
67 | } else { | |
68 | aux[k, ] <- datamat[ll == k, ] | |
69 | } | |
70 | } | |
71 | aux | |
72 | }) | |
73 | } | |
74 | ||
75 | close(con) # close connection to the file | |
76 | ||
77 | # save(synchros, file = "~/tmp/2010synchrosdf200WER") | |
78 | ||
79 |