## File : 01_extract-features_2010.r ## Description : Using the full data matrix, we extract handy features to ## cluster. rm(list = ls()) source('http://eric.univ-lyon2.fr/~jcugliari/codes/functional-clustering.r') setwd("~/ownCloud/projects/2014_EDF-Orsay-Lyon2/codes/") ## 1. Read auxiliar data files #### identifiants <- read.table("identifs.txt")[ ,1] dates0 <- read.table("datesall.txt")[, 1] dates <- dates0[grep("2010", dates0)] rm(dates0) n <- length(identifiants) p <- length(dates) blocks <- c(rep(6500, 3), 5511) ## 2. Process the large file #### close(con) con <- file("~/tmp/2010_full.txt") # Establish a connection to the file open(con, "r") # Open the connection for(b in seq_along(blocks)){ # Reading loop nb <- blocks[b] actual <- readLines(con = con, n = nb ) auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE) rm(actual) datamat <- t(apply(auxmat[, -1], 1, as.numeric)) rownames(datamat) <- substr(auxmat[, 1], 2, 7) rm(auxmat) auxDWT <- t(apply(datamat, 1, toDWT)) auxcontrib <- t(apply(auxDWT, 1, contrib)) rm(auxDWT) if(b == 1) { matcontrib <- auxcontrib } else { matcontrib <- rbind(matcontrib, auxcontrib) } } close(con) # close connection to the file write.table(matcontrib, file = "~/tmp/2010_contrib.txt")