Commit | Line | Data |
---|---|---|
b7cd987d BA |
1 | library(RPostgreSQL) |
2 | library(data.table) | |
3 | ||
4 | nb_curves_per_request <- 5 # curves per (insert) request | |
5 | #tot_nb_curves <- 25e3 # total number of curves | |
6 | #dimension <- 17519 # number of sample points | |
7 | #nb_clust <- 15 # number of clusters | |
8 | temp_file <- "tmp_curves_batch" # (accessible) temporary file to store curves | |
9 | ||
10 | # Init connection with DB | |
11 | driver <- PostgreSQL(fetch.default.rec = nb_curves_per_request) | |
12 | con <- dbConnect(driver, user = "irsdi", password = "irsdi2017", | |
13 | host = "localhost", port = "5432", dbname = "edf25m") | |
14 | ||
15 | setwd("~/tmp/") | |
16 | ref_centroids <- fread("2009_matrix-dt.csv") | |
17 | #ref_centroids <- sapply(1:nb_clust, function(k) cumsum(rnorm(dimension))) | |
18 | ||
19 | genRandCurves <- function(line, times) { | |
20 | #ids <- as.integer(sprintf("%04i", seq_len(times) - 1)) * 1e6 + line[1] | |
21 | ids <- as.integer(sprintf("%010i", (seq_len(times) - 1) * 1e6 + line[1])) | |
22 | curve <- as.matrix(line[-1]) | |
23 | # simus <- lapply(1:times, function(i) line[-1] * runif(length(curve), .95, 1.05)) | |
24 | perturbances <- matrix(runif(length(curve) * times, .95, 1.05), nrow = times) | |
25 | #curves_sim <- cbind(ids, t(apply(perturbances, 1, FUN = '*', curve))) | |
26 | curves_sim <- data.frame(ids, t(apply(perturbances, 1, FUN = '*', curve))) | |
27 | # series in columns, data as data.frame (as fwrite requests) | |
28 | return(curves_sim) | |
29 | } | |
30 | ||
31 | # Loop: generate nb_curves_per_request curves, store them on a temp file, | |
32 | # and insert into DB using COPY command (should be faster than insert) | |
33 | system.time( | |
34 | for (i in seq_len(nrow(ref_centroids))) { | |
35 | curves <- genRandCurves(line = as.matrix(ref_centroids[i, ]), times = nb_curves_per_request) | |
36 | fwrite(curves, temp_file, append = FALSE, sep = ",", col.names = FALSE) | |
37 | # Required hack: add brackets (PostgreSQL syntax ...) | |
38 | system(paste("sed -i 's/\\(.*\\)/{\\1}/g' ", temp_file, sep = '')) | |
39 | query <- paste("COPY series (curve) FROM '", normalizePath(temp_file), "';", sep = '') | |
40 | dbSendQuery(con, query) | |
41 | } | |
42 | ) | |
43 | ||
44 | dbDisconnect(con) | |
45 | unlink(temp_file) |