| 1 | ## File : 01_extract-features_2009.r |
| 2 | ## Description : Using the full data matrix, we extract handy features to |
| 3 | ## cluster. |
| 4 | |
| 5 | rm(list = ls()) |
| 6 | |
| 7 | #source('http://eric.univ-lyon2.fr/~jcugliari/codes/functional-clustering.r') |
| 8 | setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/") |
| 9 | source('aux2.r') |
| 10 | |
| 11 | ## 1. Read auxiliar data files #### |
| 12 | |
| 13 | identifiants <- read.table("identifs.txt")[ ,1] |
| 14 | dates0 <- read.table("datesall.txt")[, 1] |
| 15 | dates <- dates0[grep("2009", dates0)] |
| 16 | rm(dates0) |
| 17 | |
| 18 | n <- length(identifiants) |
| 19 | p <- length(dates) |
| 20 | |
| 21 | blocks <- c(rep(6500, 3), 5511) |
| 22 | |
| 23 | # table( substr(dates, 11, 15) ) # Sunlight time saving produces an |
| 24 | # unbalanced number of time points |
| 25 | # per time stepa across the year |
| 26 | |
| 27 | |
| 28 | ## 2. Process the large file #### |
| 29 | |
| 30 | close(con) |
| 31 | con <- file("~/tmp/2009_full.txt") # Establish a connection to the file |
| 32 | open(con, "r") # Open the connection |
| 33 | |
| 34 | for(b in seq_along(blocks)){ # Reading loop |
| 35 | nb <- blocks[b] |
| 36 | actual <- readLines(con = con, n = nb ) |
| 37 | auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE) |
| 38 | rm(actual) |
| 39 | |
| 40 | datamat <- t(apply(auxmat[, -1], 1, as.numeric)) |
| 41 | rownames(datamat) <- substr(auxmat[, 1], 2, 7) |
| 42 | rm(auxmat) |
| 43 | |
| 44 | nas <- which(is.na(datamat)[, 1]) # some 1/1/2009 are missing |
| 45 | if(length(nas)>0) datamat[nas, 1] <- rowMeans(datamat[nas, 2:4]) |
| 46 | |
| 47 | imput <- datamat[, 4180:4181] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2) |
| 48 | datamat <- cbind(datamat[, 1:4180], imput, datamat[, 4181:17518]) |
| 49 | |
| 50 | dayimp <- t(apply(datamat, 1, dayimpact)) |
| 51 | nightimp <- t(apply(datamat, 1, nightimpact)) |
| 52 | lunchimp <- t(apply(datamat, 1, lunchimpact)) |
| 53 | |
| 54 | auxfeat <- cbind(dayimp, nightimp, lunchimp) |
| 55 | |
| 56 | if(b == 1) { |
| 57 | matfeat <- auxfeat |
| 58 | } else { |
| 59 | matfeat <- rbind(matfeat, auxfeat) |
| 60 | } |
| 61 | |
| 62 | } |
| 63 | |
| 64 | close(con) # close connection to the file |
| 65 | |
| 66 | write.table(matfeat, file = "~/tmp/2009_impacts.txt") |
| 67 | |