complete first draft of package
[epclust.git] / old_C_code / stage2_UNFINISHED / src / unused / analysis-SME.r
1 ## File : ireland-data.r
2 ## Description :
3
4 rm(list = ls())
5 setwd("~/ownCloud/projects/2014_EDF-Orsay-Lyon2/codes/")
6 library(Rwave) # CWT
7 library(cluster) # pam
8
9 ## 1. Read auxiliar data files ####
10 source("aux.r") # auxiliary clustering functions
11 source("sowas-superseded.r") # auxiliary CWT functions
12 load("~/data/Irlande/Data_CER_clean/SME.RData")
13 SME <- as.matrix(SME)
14
15 nbdays <- nrow(SME) / 48
16 nb_clust <- nbdays - 365 # last year to forecast
17
18 id_clust <- 1:(48 * nb_clust)
19
20 ## 2. Compute WER distance matrix ####
21 conso <- t(SME[id_clust, ]) # ts are in lines
22 N <- delta <- ncol(conso) # length of one ts
23 n <- nrow(conso) # number of ts
24
25 # # _.a CWT -- Filtering the lowest freqs (>6m) ####
26 # nvoice <- 4
27 # # noctave4 = 2^12 = 4096 half hours ~ 90 days
28 # noctave4 <- adjust.noctave(N = N,
29 # dt = 1, s0 = 2,
30 # tw = 0, noctave = 12)
31 # # 4 here represent 2^5 = 32 half-hours ~ 1 day
32 # scalevector4 <- 2^(4:(noctave4 * nvoice) / nvoice) * 2
33 # lscvect4 <- length(scalevector4)
34 # lscvect <- lscvect4 # i should clean my code: werFam demands a lscvect
35 # Xcwt4 <- toCWT(conso, noctave = noctave4, dt = 1,
36 # scalevector = scalevector4,
37 # lt = N, smooth = FALSE,
38 # nvoice = nvoice) # observations node with CWT
39 #
40 #
41 # Xcwt2 <- matrix(NA_complex_, nrow = n, ncol= 2 + length((c(Xcwt4[,,1]))))
42 #
43 # for(i in 1:n)
44 # Xcwt2[i,] <- c(N, lscvect, Xcwt4[,,i] / max(Mod(Xcwt4[,,i])) )
45 #
46 # rm(conso, Xcwt4); gc()
47 #
48 # # _.b WER^2 distances ########
49 # Xwer_dist <- matrix(0.0, n, n)
50 # for(i in 1:(n - 1)){
51 # mat1 <- vect2mat(Xcwt2[i,])
52 # for(j in (i + 1):n){
53 # mat2 <- vect2mat(Xcwt2[j,])
54 # num <- Mod(mat1 * Conj(mat2))
55 # WX <- Mod(mat1 * Conj(mat1))
56 # WY <- Mod(mat2 * Conj(mat2))
57 # smsmnum <- smCWT(num, scalevector = scalevector4)
58 # smsmWX <- smCWT(WX, scalevector = scalevector4)
59 # smsmWY <- smCWT(WY, scalevector = scalevector4)
60 # wer2 <- sum(colSums(smsmnum)^2) /
61 # sum( sum(colSums(smsmWX) * colSums(smsmWY)) )
62 # Xwer_dist[i, j] <- sqrt(N * lscvect * (1 - wer2))
63 # Xwer_dist[j, i] <- Xwer_dist[i, j]
64 # }
65 # }
66 # diag(Xwer_dist) <- numeric(n)
67 #
68 # save(Xwer_dist, file = "~/werdist-irlanda.Rdata")
69
70 load("~/werdist-irlanda.Rdata")
71 hc <- hclust(as.dist(Xwer_dist))
72
73 plot(hc)
74
75
76 Ks <- c(2:10, 15, 20, 25, 30)
77 for(k in seq_along(Ks)){
78 K <- Ks[k]
79 pamfit <- pam(as.dist(Xwer_dist), k = K, diss = TRUE)
80
81 fname <- paste0("clustfactor", K)
82 write.table(pamfit$clustering,
83 file = paste0("~/tmp/clustfactorSME-", K, ".txt"))
84 }
85
86
87 #dir(path = '~/tmp/', pattern = 'clustfac-
88 K <- 2
89 fname <- paste0("~/tmp/clustfactorSME-", K, ".txt")
90 clustfactor <- read.table(fname)$x # pamfit$clustering
91 for(k in 1:K){
92 clustk <- which(clustfactor == k)
93 if(length(clustk) > 0) {
94 if(length(clustk) > 1) {
95 SCk <- colSums(SME[, which(clustfactor == k)])
96 } else {
97 SCk <- synchros09[which(clustfactor == k), ]
98 }
99 SC[k, ] <- SC[k, ] + SCk
100 rm(SCk)
101 }
102 }
103
104