X-Git-Url: https://git.auder.net/assets/rpsls.css?a=blobdiff_plain;f=old_C_code%2Fstage2_UNFINISHED%2Fsrc%2Funused%2F03_compute-sums-of-classesRANDOM-par_2009.r;fp=old_C_code%2Fstage2_UNFINISHED%2Fsrc%2Funused%2F03_compute-sums-of-classesRANDOM-par_2009.r;h=cbd373b7a28bd5e4e7c662af1e89c44984e1f66f;hb=14cb6cf8266c0e1299f16a4b2352f54dbae26f44;hp=0000000000000000000000000000000000000000;hpb=5edda192f1a634ba284acdea7b6302f60df7b000;p=epclust.git diff --git a/old_C_code/stage2_UNFINISHED/src/unused/03_compute-sums-of-classesRANDOM-par_2009.r b/old_C_code/stage2_UNFINISHED/src/unused/03_compute-sums-of-classesRANDOM-par_2009.r new file mode 100644 index 0000000..cbd373b --- /dev/null +++ b/old_C_code/stage2_UNFINISHED/src/unused/03_compute-sums-of-classesRANDOM-par_2009.r @@ -0,0 +1,89 @@ +## File : 03_compute-sum-of-classes_2009.r +## Description : Calculer les synchrones pour chaque groupe obtenu par le +## clustering. + +rm(list = ls()) + +MOJARRITA <- Sys.info()[4] == "mojarrita" + +if(MOJARRITA){ + setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/") +} else { + setwd("~/2014_EDF-Orsay-Lyon2/codes/") +} + + +## 1. Read auxiliar data files #### + +identifiants <- read.table("identifs.txt")[ ,1] +dates0 <- read.table("datesall.txt")[, 1] +dates <- dates0[grep("2009", dates0)] +rm(dates0) + +n <- length(identifiants) +p <- length(dates) + +if(MOJARRITA) { + blocks <- c(rep(6500, 3), 5511) + } else { + blocks <- 25011 + } + +# Fit of the clustering : clfit +load('../res/clfitdf200.Rdata') # Loads res that containts + # clusterings memberships +res <- as.data.frame(res) + +resRDN <- as.data.frame(lapply(res, sample)) +#save(file = "../res/clfitdf200RDN.Rdata", resRDN) +load("../res/clfitdf200RDN.Rdata") + +lres <- length(res) +K <- 200 #nrow(clfit$clusinfo) +rm(res) + +## 2. Process the large file #### + +close(con) +con <- file("~/tmp/2009_full.txt") # Establish a connection to the file +open(con, "r") # Open the connection + +for(b in seq_along(blocks)){ # Reading loop + nb <- blocks[b] + actual <- readLines(con = con, n = nb ) + actual_split <- strsplit(actual, " ") + rm(actual) + + #auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE) + #datamat <- t(apply(auxmat[, -1], 1, as.numeric)) # the NA introduced by + # as.numeric + auxlist <- lapply(actual_split, function(x) as.numeric(x[-1])) + datamat <- matrix(unlist(auxlist), ncol = p, byrow = TRUE) + rm(auxlist) + + # are NA strings + #rownames(datamat) <- substr(auxmat[, 1], 2, 7) + auxnames <- unlist(lapply(strsplit(actual, " "), "[", 1)) + rownames(datamat) <- substr(auxnames, 2, 7) + + rm(auxmat, actual_split) + + synchros <- lapply(resRDN, + function(ll) { + aux <- matrix(0, ncol = p, nrow = K) + for(k in 1:K) { + clustk <- which(ll == k) + if(length(clustk) > 1) { + aux[k, ] <- colSums(datamat[clustk, ]) + } else { + aux[k, ] <- datamat[clustk, ] + } + } + aux + }) +} + +close(con) # close connection to the file + +# save(synchros, file = "~/tmp/2009synchrosdf200RND") +