Commit | Line | Data |
---|---|---|
ad642dc6 BA |
1 | ## File : 01_extract-features_2009.r |
2 | ## Description : Using the full data matrix, we extract handy features to | |
3 | ## cluster. | |
4 | ||
5 | rm(list = ls()) | |
6 | ||
7 | #source('http://eric.univ-lyon2.fr/~jcugliari/codes/functional-clustering.r') | |
8 | setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/") | |
9 | source('aux2.r') | |
10 | ||
11 | ## 1. Read auxiliar data files #### | |
12 | ||
13 | identifiants <- read.table("identifs.txt")[ ,1] | |
14 | dates0 <- read.table("datesall.txt")[, 1] | |
15 | dates <- dates0[grep("2009", dates0)] | |
16 | rm(dates0) | |
17 | ||
18 | n <- length(identifiants) | |
19 | p <- length(dates) | |
20 | ||
21 | blocks <- c(rep(6500, 3), 5511) | |
22 | ||
23 | # table( substr(dates, 11, 15) ) # Sunlight time saving produces an | |
24 | # unbalanced number of time points | |
25 | # per time stepa across the year | |
26 | ||
27 | ||
28 | ## 2. Process the large file #### | |
29 | ||
30 | close(con) | |
31 | con <- file("~/tmp/2009_full.txt") # Establish a connection to the file | |
32 | open(con, "r") # Open the connection | |
33 | ||
34 | for(b in seq_along(blocks)){ # Reading loop | |
35 | nb <- blocks[b] | |
36 | actual <- readLines(con = con, n = nb ) | |
37 | auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE) | |
38 | rm(actual) | |
39 | ||
40 | datamat <- t(apply(auxmat[, -1], 1, as.numeric)) | |
41 | rownames(datamat) <- substr(auxmat[, 1], 2, 7) | |
42 | rm(auxmat) | |
43 | ||
44 | nas <- which(is.na(datamat)[, 1]) # some 1/1/2009 are missing | |
45 | if(length(nas)>0) datamat[nas, 1] <- rowMeans(datamat[nas, 2:4]) | |
46 | ||
47 | imput <- datamat[, 4180:4181] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2) | |
48 | datamat <- cbind(datamat[, 1:4180], imput, datamat[, 4181:17518]) | |
49 | ||
50 | dayimp <- t(apply(datamat, 1, dayimpact)) | |
51 | nightimp <- t(apply(datamat, 1, nightimpact)) | |
52 | lunchimp <- t(apply(datamat, 1, lunchimpact)) | |
53 | ||
54 | auxfeat <- cbind(dayimp, nightimp, lunchimp) | |
55 | ||
56 | if(b == 1) { | |
57 | matfeat <- auxfeat | |
58 | } else { | |
59 | matfeat <- rbind(matfeat, auxfeat) | |
60 | } | |
61 | ||
62 | } | |
63 | ||
64 | close(con) # close connection to the file | |
65 | ||
66 | write.table(matfeat, file = "~/tmp/2009_impacts.txt") | |
67 |