old_C_code/stage2_UNFINISHED/src/unused/03_compute-sums-of-classes_2010.r

   1 ## File : 03_compute-sum-of-classes_2010.r
   2 ## Description : Calculer les synchrones pour chaque groupe obtenu par le
   3 ##               clustering.
   4
   5 rm(list = ls())
   6
   7 setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
   8
   9 ## 1. Read auxiliar data files ####
  10
  11 identifiants <- read.table("identifs.txt")[ ,1]
  12 dates0       <- read.table("datesall.txt")[, 1]
  13 dates        <- dates0[grep("2010", dates0)]
  14 rm(dates0)
  15
  16 n <- length(identifiants)
  17 p <- length(dates)
  18
  19 blocks <- c(rep(6500, 3), 5511)
  20
  21 # Fit of the clustering : clfit
  22 #load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit500.Rdata')
  23 load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200.Rdata')
  24 load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200RC.Rdata')
  25 #  table(clfit$clustering)
  26
  27 dfclust <- data.frame(cluster = clfit$clustering)
  28 # read write.table(dfclust, file = "../res/clfit200RC-random.txt") for random
  29
  30 K       <- nrow(clfit$clusinfo)
  31 #dfclust <- head(dfclust, 50)           # just for testing purpouses
  32 synchros <- matrix(0, ncol = p, nrow = K)
  33 rm(clfit)
  34
  35 ## 2. Process the large file ####
  36
  37 close(con)
  38 con <- file("~/tmp/2010_full.txt")  # Establish a connection to the file
  39 open(con, "r")                      # Open the connection
  40
  41 for(b in seq_along(blocks)){        # Reading loop
  42   nb <- blocks[b]
  43   actual <- readLines(con = con, n = nb )
  44   auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
  45   rm(actual)
  46
  47   datamat <- t(apply(auxmat[, -1], 1, as.numeric))
  48   rownames(datamat) <- substr(auxmat[, 1], 2, 7)
  49   rm(auxmat)
  50
  51   # obtain for each line of datamat the cluster membership (if any)
  52   clustfactor <- dfclust$cluster[match(rownames(datamat), rownames(dfclust))]
  53
  54   for(k in 1:K){
  55     clustk <- which(clustfactor == k)
  56     if(length(clustk) > 0) {
  57
  58       if(length(clustk) > 1) {
  59         synchrosk <- colSums(datamat[which(clustfactor == k), ])
  60       } else {
  61         synchrosk <- datamat[which(clustfactor == k), ]
  62       }
  63       synchros[k, ] <- synchros[k, ] + synchrosk
  64       rm(synchrosk)
  65     }
  66
  67   }
  68 }
  69
  70 close(con)                      # close connection to the file
  71
  72 synchros <- data.frame(t(synchros), total = colSums(synchros))
  73
  74 # write.table(synchros, file = "~/tmp/2010_synchros200RC.txt")
  75 # write.table(synchros, file = "~/tmp/2010_synchros200-random.txt")
  76 #
  77 #
  78
  79
  80 dfclust <- read.table("clfit200muchos.txt")
  81
  82 for(pepe in 1:10) {
  83   synchros <- matrix(0, ncol = p, nrow = K)
  84   clustfactor <- dfclust[match(rownames(datamat), rownames(tdata)), pepe]
  85   for(k in 1:K){
  86     clustk <- which(clustfactor == k)
  87     if(length(clustk) > 0) {
  88       if(length(clustk) > 1) {
  89         synchrosk <- colSums(datamat[which(clustfactor == k), ])
  90         } else {
  91         synchrosk <- datamat[which(clustfactor == k), ]
  92         }
  93       synchros[k, ] <- synchros[k, ] + synchrosk
  94       rm(synchrosk)
  95     }
  96   }
  97
  98   synchros <- data.frame(t(synchros), total = colSums(synchros))
  99   write.table(synchros, file = paste0(colnames(dfclust)[pepe], "2010.txt"))
 100 }
 101