old_C_code/stage2_UNFINISHED/src/unused/01_extract-features2_2009.r

   1 ## File : 01_extract-features_2009.r
   2 ## Description : Using the full data matrix, we extract handy features to
   3 ##               cluster.
   4
   5 rm(list = ls())
   6
   7 #source('http://eric.univ-lyon2.fr/~jcugliari/codes/functional-clustering.r')
   8 setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
   9 source('aux2.r')
  10
  11 ## 1. Read auxiliar data files ####
  12
  13 identifiants <- read.table("identifs.txt")[ ,1]
  14 dates0       <- read.table("datesall.txt")[, 1]
  15 dates        <- dates0[grep("2009", dates0)]
  16 rm(dates0)
  17
  18 n <- length(identifiants)
  19 p <- length(dates)
  20
  21 blocks <- c(rep(6500, 3), 5511)
  22
  23 # table( substr(dates, 11, 15) ) # Sunlight time saving produces an
  24                                  # unbalanced number of time points
  25                                  # per time stepa across the year
  26
  27
  28 ## 2. Process the large file ####
  29
  30 close(con)
  31 con <- file("~/tmp/2009_full.txt")  # Establish a connection to the file
  32 open(con, "r")                      # Open the connection
  33
  34 for(b in seq_along(blocks)){        # Reading loop
  35   nb <- blocks[b]
  36   actual <- readLines(con = con, n = nb )
  37   auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
  38   rm(actual)
  39
  40   datamat <- t(apply(auxmat[, -1], 1, as.numeric))
  41   rownames(datamat) <- substr(auxmat[, 1], 2, 7)
  42   rm(auxmat)
  43
  44   nas <- which(is.na(datamat)[, 1]) # some 1/1/2009 are missing
  45   if(length(nas)>0) datamat[nas, 1] <- rowMeans(datamat[nas, 2:4])
  46
  47   imput   <- datamat[, 4180:4181] %*% matrix(c(2/3, 1/3, 1/3, 2/3), 2)
  48   datamat <- cbind(datamat[, 1:4180], imput, datamat[, 4181:17518])
  49
  50   dayimp   <- t(apply(datamat, 1, dayimpact))
  51   nightimp <- t(apply(datamat, 1, nightimpact))
  52   lunchimp <- t(apply(datamat, 1, lunchimpact))
  53
  54   auxfeat  <- cbind(dayimp, nightimp, lunchimp)
  55
  56   if(b == 1) {
  57     matfeat <- auxfeat
  58   } else {
  59     matfeat <- rbind(matfeat, auxfeat)
  60   }
  61
  62 }
  63
  64 close(con)                      # close connection to the file
  65
  66 write.table(matfeat, file = "~/tmp/2009_impacts.txt")
  67