| 1 | library(RPostgreSQL) |
| 2 | library(data.table) |
| 3 | |
| 4 | nb_curves_per_request <- 5 # curves per (insert) request |
| 5 | #tot_nb_curves <- 25e3 # total number of curves |
| 6 | #dimension <- 17519 # number of sample points |
| 7 | #nb_clust <- 15 # number of clusters |
| 8 | temp_file <- "tmp_curves_batch" # (accessible) temporary file to store curves |
| 9 | |
| 10 | # Init connection with DB |
| 11 | driver <- PostgreSQL(fetch.default.rec = nb_curves_per_request) |
| 12 | con <- dbConnect(driver, user = "irsdi", password = "irsdi2017", |
| 13 | host = "localhost", port = "5432", dbname = "edf25m") |
| 14 | |
| 15 | setwd("~/tmp/") |
| 16 | ref_centroids <- fread("2009_matrix-dt.csv") |
| 17 | #ref_centroids <- sapply(1:nb_clust, function(k) cumsum(rnorm(dimension))) |
| 18 | |
| 19 | genRandCurves <- function(line, times) { |
| 20 | #ids <- as.integer(sprintf("%04i", seq_len(times) - 1)) * 1e6 + line[1] |
| 21 | ids <- as.integer(sprintf("%010i", (seq_len(times) - 1) * 1e6 + line[1])) |
| 22 | curve <- as.matrix(line[-1]) |
| 23 | # simus <- lapply(1:times, function(i) line[-1] * runif(length(curve), .95, 1.05)) |
| 24 | perturbances <- matrix(runif(length(curve) * times, .95, 1.05), nrow = times) |
| 25 | #curves_sim <- cbind(ids, t(apply(perturbances, 1, FUN = '*', curve))) |
| 26 | curves_sim <- data.frame(ids, t(apply(perturbances, 1, FUN = '*', curve))) |
| 27 | # series in columns, data as data.frame (as fwrite requests) |
| 28 | return(curves_sim) |
| 29 | } |
| 30 | |
| 31 | # Loop: generate nb_curves_per_request curves, store them on a temp file, |
| 32 | # and insert into DB using COPY command (should be faster than insert) |
| 33 | system.time( |
| 34 | for (i in seq_len(nrow(ref_centroids))) { |
| 35 | curves <- genRandCurves(line = as.matrix(ref_centroids[i, ]), times = nb_curves_per_request) |
| 36 | fwrite(curves, temp_file, append = FALSE, sep = ",", col.names = FALSE) |
| 37 | # Required hack: add brackets (PostgreSQL syntax ...) |
| 38 | system(paste("sed -i 's/\\(.*\\)/{\\1}/g' ", temp_file, sep = '')) |
| 39 | query <- paste("COPY series (curve) FROM '", normalizePath(temp_file), "';", sep = '') |
| 40 | dbSendQuery(con, query) |
| 41 | } |
| 42 | ) |
| 43 | |
| 44 | dbDisconnect(con) |
| 45 | unlink(temp_file) |