Commit | Line | Data |
---|---|---|
ad642dc6 BA |
1 | ## File : 01_extract-features_2010.r |
2 | ## Description : Using the full data matrix, we extract handy features to | |
3 | ## cluster. | |
4 | ||
5 | rm(list = ls()) | |
6 | ||
7 | source('http://eric.univ-lyon2.fr/~jcugliari/codes/functional-clustering.r') | |
8 | setwd("~/ownCloud/projects/2014_EDF-Orsay-Lyon2/codes/") | |
9 | ||
10 | ## 1. Read auxiliar data files #### | |
11 | ||
12 | identifiants <- read.table("identifs.txt")[ ,1] | |
13 | dates0 <- read.table("datesall.txt")[, 1] | |
14 | dates <- dates0[grep("2010", dates0)] | |
15 | rm(dates0) | |
16 | ||
17 | n <- length(identifiants) | |
18 | p <- length(dates) | |
19 | ||
20 | blocks <- c(rep(6500, 3), 5511) | |
21 | ||
22 | ||
23 | ## 2. Process the large file #### | |
24 | ||
25 | close(con) | |
26 | con <- file("~/tmp/2010_full.txt") # Establish a connection to the file | |
27 | open(con, "r") # Open the connection | |
28 | ||
29 | for(b in seq_along(blocks)){ # Reading loop | |
30 | nb <- blocks[b] | |
31 | actual <- readLines(con = con, n = nb ) | |
32 | auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE) | |
33 | rm(actual) | |
34 | ||
35 | datamat <- t(apply(auxmat[, -1], 1, as.numeric)) | |
36 | rownames(datamat) <- substr(auxmat[, 1], 2, 7) | |
37 | rm(auxmat) | |
38 | ||
39 | auxDWT <- t(apply(datamat, 1, toDWT)) | |
40 | auxcontrib <- t(apply(auxDWT, 1, contrib)) | |
41 | rm(auxDWT) | |
42 | ||
43 | if(b == 1) { | |
44 | matcontrib <- auxcontrib | |
45 | } else { | |
46 | matcontrib <- rbind(matcontrib, auxcontrib) | |
47 | } | |
48 | ||
49 | } | |
50 | ||
51 | close(con) # close connection to the file | |
52 | ||
53 | write.table(matcontrib, file = "~/tmp/2010_contrib.txt") | |
54 |