[epclust.git] / old_C_code / stage2_UNFINISHED / src / 01_extract-features_2010.r

## File : 01_extract-features_2010.r
## Description : Using the full data matrix, we extract handy features to
##               cluster.

rm(list = ls())

source('http://eric.univ-lyon2.fr/~jcugliari/codes/functional-clustering.r')
setwd("~/ownCloud/projects/2014_EDF-Orsay-Lyon2/codes/")

## 1. Read auxiliar data files ####

identifiants <- read.table("identifs.txt")[ ,1]
dates0       <- read.table("datesall.txt")[, 1]
dates        <- dates0[grep("2010", dates0)]
rm(dates0)

n <- length(identifiants)
p <- length(dates)

blocks <- c(rep(6500, 3), 5511)  


## 2. Process the large file ####

close(con)
con <- file("~/tmp/2010_full.txt")  # Establish a connection to the file
open(con, "r")                      # Open the connection

for(b in seq_along(blocks)){        # Reading loop
  nb <- blocks[b]
  actual <- readLines(con = con, n = nb )
  auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
  rm(actual)

  datamat <- t(apply(auxmat[, -1], 1, as.numeric))
  rownames(datamat) <- substr(auxmat[, 1], 2, 7)
  rm(auxmat)

  auxDWT <- t(apply(datamat, 1, toDWT))
  auxcontrib <- t(apply(auxDWT,  1, contrib))
  rm(auxDWT)
  
  if(b == 1) {
    matcontrib <- auxcontrib
  } else {
    matcontrib <- rbind(matcontrib, auxcontrib)
  }
  
}

close(con)                      # close connection to the file

write.table(matcontrib, file = "~/tmp/2010_contrib.txt")
Commit	Line	Data
	1	## File : 01_extract-features_2010.r
	2	## Description : Using the full data matrix, we extract handy features to
	3	## cluster.
	4
	5	rm(list = ls())
	6
	7	source('http://eric.univ-lyon2.fr/~jcugliari/codes/functional-clustering.r')
	8	setwd("~/ownCloud/projects/2014_EDF-Orsay-Lyon2/codes/")
	9
	10	## 1. Read auxiliar data files ####
	11
	12	identifiants <- read.table("identifs.txt")[ ,1]
	13	dates0 <- read.table("datesall.txt")[, 1]
	14	dates <- dates0[grep("2010", dates0)]
	15	rm(dates0)
	16
	17	n <- length(identifiants)
	18	p <- length(dates)
	19
	20	blocks <- c(rep(6500, 3), 5511)
	21
	22
	23	## 2. Process the large file ####
	24
	25	close(con)
	26	con <- file("~/tmp/2010_full.txt") # Establish a connection to the file
	27	open(con, "r") # Open the connection
	28
	29	for(b in seq_along(blocks)){ # Reading loop
	30	nb <- blocks[b]
	31	actual <- readLines(con = con, n = nb )
	32	auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
	33	rm(actual)
	34
	35	datamat <- t(apply(auxmat[, -1], 1, as.numeric))
	36	rownames(datamat) <- substr(auxmat[, 1], 2, 7)
	37	rm(auxmat)
	38
	39	auxDWT <- t(apply(datamat, 1, toDWT))
	40	auxcontrib <- t(apply(auxDWT, 1, contrib))
	41	rm(auxDWT)
	42
	43	if(b == 1) {
	44	matcontrib <- auxcontrib
	45	} else {
	46	matcontrib <- rbind(matcontrib, auxcontrib)
	47	}
	48
	49	}
	50
	51	close(con) # close connection to the file
	52
	53	write.table(matcontrib, file = "~/tmp/2010_contrib.txt")
	54