X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=code%2Fstage2%2Fsrc%2Funused%2F03_compute-sums-of-classesRANDOM_2010.r;fp=code%2Fstage2%2Fsrc%2Funused%2F03_compute-sums-of-classesRANDOM_2010.r;h=0000000000000000000000000000000000000000;hb=7709d507dfab9256a401f2c77ced7bc70d90fec3;hp=0c75bd022ec065ec20ec160b8456cd3735f5a7e0;hpb=38aef1cbef037257bf03dd1e65fbb682a32e3f2c;p=epclust.git diff --git a/code/stage2/src/unused/03_compute-sums-of-classesRANDOM_2010.r b/code/stage2/src/unused/03_compute-sums-of-classesRANDOM_2010.r deleted file mode 100644 index 0c75bd0..0000000 --- a/code/stage2/src/unused/03_compute-sums-of-classesRANDOM_2010.r +++ /dev/null @@ -1,82 +0,0 @@ -## File : 03_compute-sum-of-classes_2010.r -## Description : Calculer les synchrones pour chaque groupe obtenu par le -## clustering. - -rm(list = ls()) - -setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/") - -## 1. Read auxiliar data files #### - -identifiants <- read.table("identifs.txt")[ ,1] -dates0 <- read.table("datesall.txt")[, 1] -dates <- dates0[grep("2009", dates0)] -rm(dates0) - -n <- length(identifiants) -p <- length(dates) - -blocks <- c(rep(6500, 3), 5511) - -# Fit of the clustering : clfit -#load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit500.Rdata') -load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200.Rdata') -#load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200RC.Rdata') -# table(clfit$clustering) - -dfclust <- data.frame(cluster = clfit$clustering) # real clustering -K <- nrow(clfit$clusinfo) - -#dfclust2 <- dfclust -dfclust$cluster <- sample(unlist(Map(rep, 1:200, table(dfclust)))) # random clust - - - -#dfclust <- head(dfclust, 50) # just for testing purpouses -synchros <- matrix(0, ncol = p, nrow = K) -rm(clfit) - -## 2. Process the large file #### - -close(con) -con <- file("~/tmp/2010_full.txt") # Establish a connection to the file -open(con, "r") # Open the connection - -for(b in seq_along(blocks)){ # Reading loop - nb <- blocks[b] - actual <- readLines(con = con, n = nb ) - auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE) - rm(actual) - - datamat <- t(apply(auxmat[, -1], 1, as.numeric)) - rownames(datamat) <- substr(auxmat[, 1], 2, 7) - rm(auxmat) - - # obtain for each line of datamat the cluster membership (if any) - clustfactor <- dfclust$cluster[match(rownames(datamat), rownames(dfclust))] - -# if(length(which(rownames(datamat) == "218095"))>0) print(b); - - for(k in 1:K){ - clustk <- which(clustfactor == k) - if(length(clustk) > 0) { - - if(length(clustk) > 1) { - synchrosk <- colSums(datamat[which(clustfactor == k), ]) - } else { - synchrosk <- datamat[which(clustfactor == k), ] - } - synchros[k, ] <- synchros[k, ] + synchrosk - rm(synchrosk) - } - } -} - -close(con) # close connection to the file - -synchros <- data.frame(t(synchros), total = colSums(synchros)) - -write.table(synchros, file = "~/tmp/2009_synchros200RANDOM.txt") -write.table(dfclust, file = "../res/dfclust200RANDOM.txt") -#write.table(synchros, file = "~/tmp/2009_synchros200RC.txt") -