complete first draft of package
[epclust.git] / old_C_code / stage2_UNFINISHED / src / 01_extract-features_2009.r
CommitLineData
ad642dc6
BA
1## File : 01_extract-features_2009.r
2## Description : Using the full data matrix, we extract handy features to
3## cluster.
4
5rm(list = ls())
6
7source('http://eric.univ-lyon2.fr/~jcugliari/codes/functional-clustering.r')
8setwd("~/ownCloud/projects/2014_EDF-Orsay-Lyon2/codes/")
9
10## 1. Read auxiliar data files ####
11
12identifiants <- read.table("identifs.txt")[ ,1]
13dates0 <- read.table("datesall.txt")[, 1]
14dates <- dates0[grep("2009", dates0)]
15rm(dates0)
16
17n <- length(identifiants)
18p <- length(dates)
19
20blocks <- c(rep(6500, 3), 5511)
21
22# table( substr(dates, 11, 15) ) # Sunlight time saving produces an
23 # unbalanced number of time points
24 # per time stepa across the year
25
26
27## 2. Process the large file ####
28
29close(con)
30con <- file("~/tmp/2009_full.txt") # Establish a connection to the file
31open(con, "r") # Open the connection
32
33for(b in seq_along(blocks)){ # Reading loop
34 nb <- blocks[b]
35 actual <- readLines(con = con, n = nb )
36 auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
37 rm(actual)
38
39 datamat <- t(apply(auxmat[, -1], 1, as.numeric))
40 rownames(datamat) <- substr(auxmat[, 1], 2, 7)
41 rm(auxmat)
42
43 auxDWT <- t(apply(datamat, 1, toDWT))
44 auxcontrib <- t(apply(auxDWT, 1, contrib))
45 rm(auxDWT)
46
47 if(b == 1) {
48 matcontrib <- auxcontrib
49 } else {
50 matcontrib <- rbind(matcontrib, auxcontrib)
51 }
52
53}
54
55close(con) # close connection to the file
56
57write.table(matcontrib, file = "~/tmp/2009_contrib.txt")
58