old_C_code/stage2_UNFINISHED/src/unused/03_compute-sums-of-classes-par_2009.r

   1 ## File : 03_compute-sum-of-classes_2009.r
   2 ## Description : Calculer les synchrones pour chaque groupe obtenu par le
   3 ##               clustering.
   4
   5 rm(list = ls())
   6
   7 MOJARRITA <- Sys.info()[4] ==  "mojarrita"
   8
   9 if(MOJARRITA){
  10   setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
  11 } else {
  12   setwd("~/2014_EDF-Orsay-Lyon2/codes/")
  13 }
  14
  15
  16 ## 1. Read auxiliar data files ####
  17
  18 identifiants <- read.table("identifs.txt")[ ,1]
  19 dates0       <- read.table("datesall.txt")[, 1]
  20 dates        <- dates0[grep("2009", dates0)]
  21 rm(dates0)
  22
  23 n <- length(identifiants)
  24 p <- length(dates)
  25
  26 if(MOJARRITA) {
  27   blocks <- c(rep(6500, 3), 5511)
  28   } else {
  29   blocks <- 25011
  30   }
  31
  32 # Fit of the clustering : clfit
  33 load('../res/clfitdf200.Rdata') # Loads res that containts
  34                                   # clusterings memberships
  35 res <- as.data.frame(res)
  36
  37 lres <- length(res)
  38 K       <- 200 #nrow(clfit$clusinfo)
  39
  40
  41 ## 2. Process the large file ####
  42
  43 close(con)
  44 con <- file("~/tmp/2009_full.txt")  # Establish a connection to the file
  45 open(con, "r")                      # Open the connection
  46
  47 for(b in seq_along(blocks)){        # Reading loop
  48   nb <- blocks[b]
  49   actual <- readLines(con = con, n = nb )
  50   auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
  51   rm(actual)
  52
  53   datamat <- t(apply(auxmat[, -1], 1, as.numeric)) # the NA introduced by as.numeric
  54                                                    # are NA strings
  55   rownames(datamat) <- substr(auxmat[, 1], 2, 7)
  56   rm(auxmat)
  57
  58   synchros <- lapply(res,
  59                      function(ll) {
  60                         aux <- matrix(0, ncol = p, nrow = K)
  61                         for(k in 1:K) {
  62                           clustk <- which(ll == k)
  63                           if(length(clustk) > 1) {
  64                             aux[k, ] <- colSums(datamat[ll ==  k, ])
  65                           } else {
  66                             aux[k, ] <- datamat[ll ==  k, ]
  67                           }
  68                         }
  69                         aux
  70                       })
  71 }
  72
  73 close(con)                # close connection to the file
  74
  75 # save(synchros, file = "~/tmp/2009synchrosdf200WER")
  76