| 1 | ## File : ireland-data.r |
| 2 | ## Description : |
| 3 | |
| 4 | rm(list = ls()) |
| 5 | setwd("~/ownCloud/projects/2014_EDF-Orsay-Lyon2/codes/") |
| 6 | library(Rwave) # CWT |
| 7 | library(cluster) # pam |
| 8 | |
| 9 | ## 1. Read auxiliar data files #### |
| 10 | source("aux.r") # auxiliary clustering functions |
| 11 | source("sowas-superseded.r") # auxiliary CWT functions |
| 12 | load("~/data/Irlande/Data_CER_clean/SME.RData") |
| 13 | SME <- as.matrix(SME) |
| 14 | |
| 15 | nbdays <- nrow(SME) / 48 |
| 16 | nb_clust <- nbdays - 365 # last year to forecast |
| 17 | |
| 18 | id_clust <- 1:(48 * nb_clust) |
| 19 | |
| 20 | ## 2. Compute WER distance matrix #### |
| 21 | conso <- t(SME[id_clust, ]) # ts are in lines |
| 22 | N <- delta <- ncol(conso) # length of one ts |
| 23 | n <- nrow(conso) # number of ts |
| 24 | |
| 25 | # # _.a CWT -- Filtering the lowest freqs (>6m) #### |
| 26 | # nvoice <- 4 |
| 27 | # # noctave4 = 2^12 = 4096 half hours ~ 90 days |
| 28 | # noctave4 <- adjust.noctave(N = N, |
| 29 | # dt = 1, s0 = 2, |
| 30 | # tw = 0, noctave = 12) |
| 31 | # # 4 here represent 2^5 = 32 half-hours ~ 1 day |
| 32 | # scalevector4 <- 2^(4:(noctave4 * nvoice) / nvoice) * 2 |
| 33 | # lscvect4 <- length(scalevector4) |
| 34 | # lscvect <- lscvect4 # i should clean my code: werFam demands a lscvect |
| 35 | # Xcwt4 <- toCWT(conso, noctave = noctave4, dt = 1, |
| 36 | # scalevector = scalevector4, |
| 37 | # lt = N, smooth = FALSE, |
| 38 | # nvoice = nvoice) # observations node with CWT |
| 39 | # |
| 40 | # |
| 41 | # Xcwt2 <- matrix(NA_complex_, nrow = n, ncol= 2 + length((c(Xcwt4[,,1])))) |
| 42 | # |
| 43 | # for(i in 1:n) |
| 44 | # Xcwt2[i,] <- c(N, lscvect, Xcwt4[,,i] / max(Mod(Xcwt4[,,i])) ) |
| 45 | # |
| 46 | # rm(conso, Xcwt4); gc() |
| 47 | # |
| 48 | # # _.b WER^2 distances ######## |
| 49 | # Xwer_dist <- matrix(0.0, n, n) |
| 50 | # for(i in 1:(n - 1)){ |
| 51 | # mat1 <- vect2mat(Xcwt2[i,]) |
| 52 | # for(j in (i + 1):n){ |
| 53 | # mat2 <- vect2mat(Xcwt2[j,]) |
| 54 | # num <- Mod(mat1 * Conj(mat2)) |
| 55 | # WX <- Mod(mat1 * Conj(mat1)) |
| 56 | # WY <- Mod(mat2 * Conj(mat2)) |
| 57 | # smsmnum <- smCWT(num, scalevector = scalevector4) |
| 58 | # smsmWX <- smCWT(WX, scalevector = scalevector4) |
| 59 | # smsmWY <- smCWT(WY, scalevector = scalevector4) |
| 60 | # wer2 <- sum(colSums(smsmnum)^2) / |
| 61 | # sum( sum(colSums(smsmWX) * colSums(smsmWY)) ) |
| 62 | # Xwer_dist[i, j] <- sqrt(N * lscvect * (1 - wer2)) |
| 63 | # Xwer_dist[j, i] <- Xwer_dist[i, j] |
| 64 | # } |
| 65 | # } |
| 66 | # diag(Xwer_dist) <- numeric(n) |
| 67 | # |
| 68 | # save(Xwer_dist, file = "~/werdist-irlanda.Rdata") |
| 69 | |
| 70 | load("~/werdist-irlanda.Rdata") |
| 71 | hc <- hclust(as.dist(Xwer_dist)) |
| 72 | |
| 73 | plot(hc) |
| 74 | |
| 75 | |
| 76 | Ks <- c(2:10, 15, 20, 25, 30) |
| 77 | for(k in seq_along(Ks)){ |
| 78 | K <- Ks[k] |
| 79 | pamfit <- pam(as.dist(Xwer_dist), k = K, diss = TRUE) |
| 80 | |
| 81 | fname <- paste0("clustfactor", K) |
| 82 | write.table(pamfit$clustering, |
| 83 | file = paste0("~/tmp/clustfactorSME-", K, ".txt")) |
| 84 | } |
| 85 | |
| 86 | |
| 87 | #dir(path = '~/tmp/', pattern = 'clustfac- |
| 88 | K <- 2 |
| 89 | fname <- paste0("~/tmp/clustfactorSME-", K, ".txt") |
| 90 | clustfactor <- read.table(fname)$x # pamfit$clustering |
| 91 | for(k in 1:K){ |
| 92 | clustk <- which(clustfactor == k) |
| 93 | if(length(clustk) > 0) { |
| 94 | if(length(clustk) > 1) { |
| 95 | SCk <- colSums(SME[, which(clustfactor == k)]) |
| 96 | } else { |
| 97 | SCk <- synchros09[which(clustfactor == k), ] |
| 98 | } |
| 99 | SC[k, ] <- SC[k, ] + SCk |
| 100 | rm(SCk) |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | |