## File : ireland-data.r ## Description : rm(list = ls()) setwd("~/ownCloud/projects/2014_EDF-Orsay-Lyon2/codes/") library(Rwave) # CWT library(cluster) # pam ## 1. Read auxiliar data files #### source("aux.r") # auxiliary clustering functions source("sowas-superseded.r") # auxiliary CWT functions load("~/data/Irlande/Data_CER_clean/SME.RData") SME <- as.matrix(SME) nbdays <- nrow(SME) / 48 nb_clust <- nbdays - 365 # last year to forecast id_clust <- 1:(48 * nb_clust) ## 2. Compute WER distance matrix #### conso <- t(SME[id_clust, ]) # ts are in lines N <- delta <- ncol(conso) # length of one ts n <- nrow(conso) # number of ts # # _.a CWT -- Filtering the lowest freqs (>6m) #### # nvoice <- 4 # # noctave4 = 2^12 = 4096 half hours ~ 90 days # noctave4 <- adjust.noctave(N = N, # dt = 1, s0 = 2, # tw = 0, noctave = 12) # # 4 here represent 2^5 = 32 half-hours ~ 1 day # scalevector4 <- 2^(4:(noctave4 * nvoice) / nvoice) * 2 # lscvect4 <- length(scalevector4) # lscvect <- lscvect4 # i should clean my code: werFam demands a lscvect # Xcwt4 <- toCWT(conso, noctave = noctave4, dt = 1, # scalevector = scalevector4, # lt = N, smooth = FALSE, # nvoice = nvoice) # observations node with CWT # # # Xcwt2 <- matrix(NA_complex_, nrow = n, ncol= 2 + length((c(Xcwt4[,,1])))) # # for(i in 1:n) # Xcwt2[i,] <- c(N, lscvect, Xcwt4[,,i] / max(Mod(Xcwt4[,,i])) ) # # rm(conso, Xcwt4); gc() # # # _.b WER^2 distances ######## # Xwer_dist <- matrix(0.0, n, n) # for(i in 1:(n - 1)){ # mat1 <- vect2mat(Xcwt2[i,]) # for(j in (i + 1):n){ # mat2 <- vect2mat(Xcwt2[j,]) # num <- Mod(mat1 * Conj(mat2)) # WX <- Mod(mat1 * Conj(mat1)) # WY <- Mod(mat2 * Conj(mat2)) # smsmnum <- smCWT(num, scalevector = scalevector4) # smsmWX <- smCWT(WX, scalevector = scalevector4) # smsmWY <- smCWT(WY, scalevector = scalevector4) # wer2 <- sum(colSums(smsmnum)^2) / # sum( sum(colSums(smsmWX) * colSums(smsmWY)) ) # Xwer_dist[i, j] <- sqrt(N * lscvect * (1 - wer2)) # Xwer_dist[j, i] <- Xwer_dist[i, j] # } # } # diag(Xwer_dist) <- numeric(n) # # save(Xwer_dist, file = "~/werdist-irlanda.Rdata") load("~/werdist-irlanda.Rdata") hc <- hclust(as.dist(Xwer_dist)) plot(hc) Ks <- c(2:10, 15, 20, 25, 30) for(k in seq_along(Ks)){ K <- Ks[k] pamfit <- pam(as.dist(Xwer_dist), k = K, diss = TRUE) fname <- paste0("clustfactor", K) write.table(pamfit$clustering, file = paste0("~/tmp/clustfactorSME-", K, ".txt")) } #dir(path = '~/tmp/', pattern = 'clustfac- K <- 2 fname <- paste0("~/tmp/clustfactorSME-", K, ".txt") clustfactor <- read.table(fname)$x # pamfit$clustering for(k in 1:K){ clustk <- which(clustfactor == k) if(length(clustk) > 0) { if(length(clustk) > 1) { SCk <- colSums(SME[, which(clustfactor == k)]) } else { SCk <- synchros09[which(clustfactor == k), ] } SC[k, ] <- SC[k, ] + SCk rm(SCk) } }