[epclust.git] / old_C_code / stage2_UNFINISHED / src / unused / 03_compute-sums-of-classes-par_2009.r

## File : 03_compute-sum-of-classes_2009.r
## Description : Calculer les synchrones pour chaque groupe obtenu par le
##               clustering. 

rm(list = ls())

MOJARRITA <- Sys.info()[4] ==  "mojarrita"

if(MOJARRITA){ 
  setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
} else {
  setwd("~/2014_EDF-Orsay-Lyon2/codes/")
}


## 1. Read auxiliar data files ####

identifiants <- read.table("identifs.txt")[ ,1]
dates0       <- read.table("datesall.txt")[, 1]
dates        <- dates0[grep("2009", dates0)]
rm(dates0)

n <- length(identifiants)
p <- length(dates)

if(MOJARRITA) { 
  blocks <- c(rep(6500, 3), 5511) 
  } else {
  blocks <- 25011  
  } 

# Fit of the clustering : clfit 
load('../res/clfitdf200.Rdata') # Loads res that containts  
                                  # clusterings memberships
res <- as.data.frame(res)

lres <- length(res)
K       <- 200 #nrow(clfit$clusinfo)


## 2. Process the large file ####

close(con)
con <- file("~/tmp/2009_full.txt")  # Establish a connection to the file
open(con, "r")                      # Open the connection

for(b in seq_along(blocks)){        # Reading loop
  nb <- blocks[b]
  actual <- readLines(con = con, n = nb )
  auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
  rm(actual)

  datamat <- t(apply(auxmat[, -1], 1, as.numeric)) # the NA introduced by as.numeric
                                                   # are NA strings
  rownames(datamat) <- substr(auxmat[, 1], 2, 7)
  rm(auxmat)

  synchros <- lapply(res, 
                     function(ll) { 
                        aux <- matrix(0, ncol = p, nrow = K)
                        for(k in 1:K) {
                          clustk <- which(ll == k)
                          if(length(clustk) > 1) {
                            aux[k, ] <- colSums(datamat[ll ==  k, ])
                          } else {
                            aux[k, ] <- datamat[ll ==  k, ]
                          }
                        }
                        aux
                      })
}

close(con)                # close connection to the file

# save(synchros, file = "~/tmp/2009synchrosdf200WER")
Commit	Line	Data
ad642dc6 BA	1	## File : 03_compute-sum-of-classes_2009.r
	2	## Description : Calculer les synchrones pour chaque groupe obtenu par le
	3	## clustering.
	4
	5	rm(list = ls())
	6
	7	MOJARRITA <- Sys.info()[4] == "mojarrita"
	8
	9	if(MOJARRITA){
	10	setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
	11	} else {
	12	setwd("~/2014_EDF-Orsay-Lyon2/codes/")
	13	}
	14
	15
	16	## 1. Read auxiliar data files ####
	17
	18	identifiants <- read.table("identifs.txt")[ ,1]
	19	dates0 <- read.table("datesall.txt")[, 1]
	20	dates <- dates0[grep("2009", dates0)]
	21	rm(dates0)
	22
	23	n <- length(identifiants)
	24	p <- length(dates)
	25
	26	if(MOJARRITA) {
	27	blocks <- c(rep(6500, 3), 5511)
	28	} else {
	29	blocks <- 25011
	30	}
	31
	32	# Fit of the clustering : clfit
	33	load('../res/clfitdf200.Rdata') # Loads res that containts
	34	# clusterings memberships
	35	res <- as.data.frame(res)
	36
	37	lres <- length(res)
	38	K <- 200 #nrow(clfit$clusinfo)
	39
	40
	41	## 2. Process the large file ####
	42
	43	close(con)
	44	con <- file("~/tmp/2009_full.txt") # Establish a connection to the file
	45	open(con, "r") # Open the connection
	46
	47	for(b in seq_along(blocks)){ # Reading loop
	48	nb <- blocks[b]
	49	actual <- readLines(con = con, n = nb )
	50	auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
	51	rm(actual)
	52
	53	datamat <- t(apply(auxmat[, -1], 1, as.numeric)) # the NA introduced by as.numeric
	54	# are NA strings
	55	rownames(datamat) <- substr(auxmat[, 1], 2, 7)
	56	rm(auxmat)
	57
	58	synchros <- lapply(res,
	59	function(ll) {
	60	aux <- matrix(0, ncol = p, nrow = K)
	61	for(k in 1:K) {
	62	clustk <- which(ll == k)
	63	if(length(clustk) > 1) {
	64	aux[k, ] <- colSums(datamat[ll == k, ])
65	} else {
66	aux[k, ] <- datamat[ll == k, ]
67	}
68	}
69	aux
70	})
71	}
72
73	close(con) # close connection to the file
74
75	# save(synchros, file = "~/tmp/2009synchrosdf200WER")
76