complete first draft of package
[epclust.git] / old_C_code / stage2_UNFINISHED / src / unused / 01_extract-features2_2009.r
1 ## File : 01_extract-features_2009.r
2 ## Description : Using the full data matrix, we extract handy features to
3 ## cluster.
4
5 rm(list = ls())
6
7 #source('http://eric.univ-lyon2.fr/~jcugliari/codes/functional-clustering.r')
8 setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
9 source('aux2.r')
10
11 ## 1. Read auxiliar data files ####
12
13 identifiants <- read.table("identifs.txt")[ ,1]
14 dates0 <- read.table("datesall.txt")[, 1]
15 dates <- dates0[grep("2009", dates0)]
16 rm(dates0)
17
18 n <- length(identifiants)
19 p <- length(dates)
20
21 blocks <- c(rep(6500, 3), 5511)
22
23 # table( substr(dates, 11, 15) ) # Sunlight time saving produces an
24 # unbalanced number of time points
25 # per time stepa across the year
26
27
28 ## 2. Process the large file ####
29
30 close(con)
31 con <- file("~/tmp/2009_full.txt") # Establish a connection to the file
32 open(con, "r") # Open the connection
33
34 for(b in seq_along(blocks)){ # Reading loop
35 nb <- blocks[b]
36 actual <- readLines(con = con, n = nb )
37 auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
38 rm(actual)
39
40 datamat <- t(apply(auxmat[, -1], 1, as.numeric))
41 rownames(datamat) <- substr(auxmat[, 1], 2, 7)
42 rm(auxmat)
43
44 nas <- which(is.na(datamat)[, 1]) # some 1/1/2009 are missing
45 if(length(nas)>0) datamat[nas, 1] <- rowMeans(datamat[nas, 2:4])
46
47 imput <- datamat[, 4180:4181] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2)
48 datamat <- cbind(datamat[, 1:4180], imput, datamat[, 4181:17518])
49
50 dayimp <- t(apply(datamat, 1, dayimpact))
51 nightimp <- t(apply(datamat, 1, nightimpact))
52 lunchimp <- t(apply(datamat, 1, lunchimpact))
53
54 auxfeat <- cbind(dayimp, nightimp, lunchimp)
55
56 if(b == 1) {
57 matfeat <- auxfeat
58 } else {
59 matfeat <- rbind(matfeat, auxfeat)
60 }
61
62 }
63
64 close(con) # close connection to the file
65
66 write.table(matfeat, file = "~/tmp/2009_impacts.txt")
67