First commit
[epclust.git] / data / curves_to_db / curves2db_jc.R
1 library(RPostgreSQL)
2 library(data.table)
3
4 nb_curves_per_request <- 5 # curves per (insert) request
5 #tot_nb_curves <- 25e3 # total number of curves
6 #dimension <- 17519 # number of sample points
7 #nb_clust <- 15 # number of clusters
8 temp_file <- "tmp_curves_batch" # (accessible) temporary file to store curves
9
10 # Init connection with DB
11 driver <- PostgreSQL(fetch.default.rec = nb_curves_per_request)
12 con <- dbConnect(driver, user = "irsdi", password = "irsdi2017",
13 host = "localhost", port = "5432", dbname = "edf25m")
14
15 setwd("~/tmp/")
16 ref_centroids <- fread("2009_matrix-dt.csv")
17 #ref_centroids <- sapply(1:nb_clust, function(k) cumsum(rnorm(dimension)))
18
19 genRandCurves <- function(line, times) {
20 #ids <- as.integer(sprintf("%04i", seq_len(times) - 1)) * 1e6 + line[1]
21 ids <- as.integer(sprintf("%010i", (seq_len(times) - 1) * 1e6 + line[1]))
22 curve <- as.matrix(line[-1])
23 # simus <- lapply(1:times, function(i) line[-1] * runif(length(curve), .95, 1.05))
24 perturbances <- matrix(runif(length(curve) * times, .95, 1.05), nrow = times)
25 #curves_sim <- cbind(ids, t(apply(perturbances, 1, FUN = '*', curve)))
26 curves_sim <- data.frame(ids, t(apply(perturbances, 1, FUN = '*', curve)))
27 # series in columns, data as data.frame (as fwrite requests)
28 return(curves_sim)
29 }
30
31 # Loop: generate nb_curves_per_request curves, store them on a temp file,
32 # and insert into DB using COPY command (should be faster than insert)
33 system.time(
34 for (i in seq_len(nrow(ref_centroids))) {
35 curves <- genRandCurves(line = as.matrix(ref_centroids[i, ]), times = nb_curves_per_request)
36 fwrite(curves, temp_file, append = FALSE, sep = ",", col.names = FALSE)
37 # Required hack: add brackets (PostgreSQL syntax ...)
38 system(paste("sed -i 's/\\(.*\\)/{\\1}/g' ", temp_file, sep = ''))
39 query <- paste("COPY series (curve) FROM '", normalizePath(temp_file), "';", sep = '')
40 dbSendQuery(con, query)
41 }
42 )
43
44 dbDisconnect(con)
45 unlink(temp_file)