major folder reorganisation, R pkg is now epclust/ at first level. Experimental usage...
[epclust.git] / old_C_code / stage2 / src / unused / 03_compute-sums-of-classesRANDOM_2010.r
diff --git a/old_C_code/stage2/src/unused/03_compute-sums-of-classesRANDOM_2010.r b/old_C_code/stage2/src/unused/03_compute-sums-of-classesRANDOM_2010.r
new file mode 100644 (file)
index 0000000..0c75bd0
--- /dev/null
@@ -0,0 +1,82 @@
+## File : 03_compute-sum-of-classes_2010.r
+## Description : Calculer les synchrones pour chaque groupe obtenu par le
+##               clustering. 
+
+rm(list = ls())
+
+setwd("~/Documents/projects/2014_EDF-Orsay-Lyon2/codes/")
+
+## 1. Read auxiliar data files ####
+
+identifiants <- read.table("identifs.txt")[ ,1]
+dates0       <- read.table("datesall.txt")[, 1]
+dates        <- dates0[grep("2009", dates0)]
+rm(dates0)
+
+n <- length(identifiants)
+p <- length(dates)
+
+blocks <- c(rep(6500, 3), 5511)  
+
+# Fit of the clustering : clfit 
+#load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit500.Rdata')
+load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200.Rdata')
+#load('~/Documents/projects/2014_EDF-Orsay-Lyon2/res/clfit200RC.Rdata')
+#  table(clfit$clustering)
+
+dfclust <- data.frame(cluster = clfit$clustering) # real clustering 
+K       <- nrow(clfit$clusinfo)
+
+#dfclust2 <- dfclust
+dfclust$cluster <- sample(unlist(Map(rep, 1:200, table(dfclust)))) # random clust
+
+
+
+#dfclust <- head(dfclust, 50)           # just for testing purpouses
+synchros <- matrix(0, ncol = p, nrow = K)
+rm(clfit)
+                   
+## 2. Process the large file ####
+
+close(con)
+con <- file("~/tmp/2010_full.txt")  # Establish a connection to the file
+open(con, "r")                      # Open the connection
+
+for(b in seq_along(blocks)){        # Reading loop
+  nb <- blocks[b]
+  actual <- readLines(con = con, n = nb )
+  auxmat <- matrix(unlist(strsplit(actual, " ")), ncol = p + 1, byrow = TRUE)
+  rm(actual)
+
+  datamat <- t(apply(auxmat[, -1], 1, as.numeric))
+  rownames(datamat) <- substr(auxmat[, 1], 2, 7)
+  rm(auxmat)
+
+  # obtain for each line of datamat the cluster membership (if any)
+  clustfactor <- dfclust$cluster[match(rownames(datamat), rownames(dfclust))] 
+  
+#  if(length(which(rownames(datamat) == "218095"))>0) print(b);
+  
+  for(k in 1:K){ 
+    clustk <- which(clustfactor == k)
+    if(length(clustk) > 0) {
+    
+    if(length(clustk) > 1) {
+      synchrosk <- colSums(datamat[which(clustfactor == k), ])
+    } else {
+      synchrosk <- datamat[which(clustfactor == k), ]
+    }
+    synchros[k, ] <- synchros[k, ] + synchrosk
+    rm(synchrosk)
+    }
+  }
+}
+
+close(con)                # close connection to the file
+
+synchros <- data.frame(t(synchros), total = colSums(synchros))
+
+write.table(synchros, file = "~/tmp/2009_synchros200RANDOM.txt")
+write.table(dfclust,  file = "../res/dfclust200RANDOM.txt")
+#write.table(synchros, file = "~/tmp/2009_synchros200RC.txt")
+