Commit | Line | Data |
---|---|---|
ad642dc6 BA |
1 | ## File : ireland-data.r |
2 | ## Description : | |
3 | ||
4 | rm(list = ls()) | |
5 | setwd("~/ownCloud/projects/2014_EDF-Orsay-Lyon2/codes/") | |
6 | library(Rwave) # CWT | |
7 | library(cluster) # pam | |
8 | ||
9 | ## 1. Read auxiliar data files #### | |
10 | source("aux.r") # auxiliary clustering functions | |
11 | source("sowas-superseded.r") # auxiliary CWT functions | |
12 | load("~/data/Irlande/Data_CER_clean/SME.RData") | |
13 | SME <- as.matrix(SME) | |
14 | ||
15 | nbdays <- nrow(SME) / 48 | |
16 | nb_clust <- nbdays - 365 # last year to forecast | |
17 | ||
18 | id_clust <- 1:(48 * nb_clust) | |
19 | ||
20 | ## 2. Compute WER distance matrix #### | |
21 | conso <- t(SME[id_clust, ]) # ts are in lines | |
22 | N <- delta <- ncol(conso) # length of one ts | |
23 | n <- nrow(conso) # number of ts | |
24 | ||
25 | # # _.a CWT -- Filtering the lowest freqs (>6m) #### | |
26 | # nvoice <- 4 | |
27 | # # noctave4 = 2^12 = 4096 half hours ~ 90 days | |
28 | # noctave4 <- adjust.noctave(N = N, | |
29 | # dt = 1, s0 = 2, | |
30 | # tw = 0, noctave = 12) | |
31 | # # 4 here represent 2^5 = 32 half-hours ~ 1 day | |
32 | # scalevector4 <- 2^(4:(noctave4 * nvoice) / nvoice) * 2 | |
33 | # lscvect4 <- length(scalevector4) | |
34 | # lscvect <- lscvect4 # i should clean my code: werFam demands a lscvect | |
35 | # Xcwt4 <- toCWT(conso, noctave = noctave4, dt = 1, | |
36 | # scalevector = scalevector4, | |
37 | # lt = N, smooth = FALSE, | |
38 | # nvoice = nvoice) # observations node with CWT | |
39 | # | |
40 | # | |
41 | # Xcwt2 <- matrix(NA_complex_, nrow = n, ncol= 2 + length((c(Xcwt4[,,1])))) | |
42 | # | |
43 | # for(i in 1:n) | |
44 | # Xcwt2[i,] <- c(N, lscvect, Xcwt4[,,i] / max(Mod(Xcwt4[,,i])) ) | |
45 | # | |
46 | # rm(conso, Xcwt4); gc() | |
47 | # | |
48 | # # _.b WER^2 distances ######## | |
49 | # Xwer_dist <- matrix(0.0, n, n) | |
50 | # for(i in 1:(n - 1)){ | |
51 | # mat1 <- vect2mat(Xcwt2[i,]) | |
52 | # for(j in (i + 1):n){ | |
53 | # mat2 <- vect2mat(Xcwt2[j,]) | |
54 | # num <- Mod(mat1 * Conj(mat2)) | |
55 | # WX <- Mod(mat1 * Conj(mat1)) | |
56 | # WY <- Mod(mat2 * Conj(mat2)) | |
57 | # smsmnum <- smCWT(num, scalevector = scalevector4) | |
58 | # smsmWX <- smCWT(WX, scalevector = scalevector4) | |
59 | # smsmWY <- smCWT(WY, scalevector = scalevector4) | |
60 | # wer2 <- sum(colSums(smsmnum)^2) / | |
61 | # sum( sum(colSums(smsmWX) * colSums(smsmWY)) ) | |
62 | # Xwer_dist[i, j] <- sqrt(N * lscvect * (1 - wer2)) | |
63 | # Xwer_dist[j, i] <- Xwer_dist[i, j] | |
64 | # } | |
65 | # } | |
66 | # diag(Xwer_dist) <- numeric(n) | |
67 | # | |
68 | # save(Xwer_dist, file = "~/werdist-irlanda.Rdata") | |
69 | ||
70 | load("~/werdist-irlanda.Rdata") | |
71 | hc <- hclust(as.dist(Xwer_dist)) | |
72 | ||
73 | plot(hc) | |
74 | ||
75 | ||
76 | Ks <- c(2:10, 15, 20, 25, 30) | |
77 | for(k in seq_along(Ks)){ | |
78 | K <- Ks[k] | |
79 | pamfit <- pam(as.dist(Xwer_dist), k = K, diss = TRUE) | |
80 | ||
81 | fname <- paste0("clustfactor", K) | |
82 | write.table(pamfit$clustering, | |
83 | file = paste0("~/tmp/clustfactorSME-", K, ".txt")) | |
84 | } | |
85 | ||
86 | ||
87 | #dir(path = '~/tmp/', pattern = 'clustfac- | |
88 | K <- 2 | |
89 | fname <- paste0("~/tmp/clustfactorSME-", K, ".txt") | |
90 | clustfactor <- read.table(fname)$x # pamfit$clustering | |
91 | for(k in 1:K){ | |
92 | clustk <- which(clustfactor == k) | |
93 | if(length(clustk) > 0) { | |
94 | if(length(clustk) > 1) { | |
95 | SCk <- colSums(SME[, which(clustfactor == k)]) | |
96 | } else { | |
97 | SCk <- synchros09[which(clustfactor == k), ] | |
98 | } | |
99 | SC[k, ] <- SC[k, ] + SCk | |
100 | rm(SCk) | |
101 | } | |
102 | } | |
103 | ||
104 |