--- /dev/null
+## File : 01_extract-features_2009.r
+## Description : Using the full data matrix, we extract handy features to
+## cluster.
+
+rm(list = ls())
+
+#source('http://eric.univ-lyon2.fr/~jcugliari/codes/functional-clustering.r')
+setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
+source('aux2.r')
+
+## 1. Read auxiliar data files ####
+
+identifiants <- read.table("identifs.txt")[ ,1]
+dates0 <- read.table("datesall.txt")[, 1]
+dates <- dates0[grep("2009", dates0)]
+rm(dates0)
+
+n <- length(identifiants)
+p <- length(dates)
+
+blocks <- c(rep(6500, 3), 5511)
+
+# table( substr(dates, 11, 15) ) # Sunlight time saving produces an
+ # unbalanced number of time points
+ # per time stepa across the year
+
+
+## 2. Process the large file ####
+
+close(con)
+con <- file("~/tmp/2009_full.txt") # Establish a connection to the file
+open(con, "r") # Open the connection
+
+for(b in seq_along(blocks)){ # Reading loop
+ nb <- blocks[b]
+ actual <- readLines(con = con, n = nb )
+ auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
+ rm(actual)
+
+ datamat <- t(apply(auxmat[, -1], 1, as.numeric))
+ rownames(datamat) <- substr(auxmat[, 1], 2, 7)
+ rm(auxmat)
+
+ nas <- which(is.na(datamat)[, 1]) # some 1/1/2009 are missing
+ if(length(nas)>0) datamat[nas, 1] <- rowMeans(datamat[nas, 2:4])
+
+ imput <- datamat[, 4180:4181] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2)
+ datamat <- cbind(datamat[, 1:4180], imput, datamat[, 4181:17518])
+
+ dayimp <- t(apply(datamat, 1, dayimpact))
+ nightimp <- t(apply(datamat, 1, nightimpact))
+ lunchimp <- t(apply(datamat, 1, lunchimpact))
+
+ auxfeat <- cbind(dayimp, nightimp, lunchimp)
+
+ if(b == 1) {
+ matfeat <- auxfeat
+ } else {
+ matfeat <- rbind(matfeat, auxfeat)
+ }
+
+}
+
+close(con) # close connection to the file
+
+write.table(matfeat, file = "~/tmp/2009_impacts.txt")
+