Commit | Line | Data |
---|---|---|
b7cd987d BA |
1 | require(RPostgreSQL) |
2 | require(data.table) | |
3 | ||
4 | nb_curves_per_request <- 100 #curves per (insert) request | |
5 | tot_nb_curves <- 25e3 #total number of curves | |
6 | dimension <- 15000 #number of sample points | |
7 | nb_clust <- 15 #number of clusters | |
8 | temp_file <- "tmp_curves_batch" #(accessible) temporary file to store curves | |
9 | ||
10 | # Init connection with DB | |
11 | driver <- PostgreSQL(fetch.default.rec = nb_curves_per_request) | |
12 | con <- dbConnect(driver, user="user", password="pwd", | |
13 | host="localhost", port="5432", dbname="db") | |
14 | ||
15 | # Replace next call + func with any custom initialization | |
16 | ref_centroids <- sapply(1:nb_clust, function(k) cumsum(rnorm(dimension))) | |
17 | genRandCurves <- function(indices) { | |
18 | mat <- sapply(indices, function(i) { | |
19 | if (i > tot_nb_curves) | |
20 | return (NULL) | |
21 | j <- sample(ncol(ref_centroids), 1) | |
22 | ref_centroids[,j] + rnorm(dimension) | |
23 | }) | |
24 | # fwrite() write per columns => need to "transpose" mat; but it's quite inefficient... | |
25 | lapply(1:dimension, function(i) mat[i,]) | |
26 | } | |
27 | ||
28 | # Loop: generate nb_curves_per_request curves, store them on a temp file, | |
29 | # and insert into DB using COPY command (should be faster than insert) | |
30 | nb_curves <- 0 | |
31 | while (nb_curves < tot_nb_curves) | |
32 | { | |
33 | curves <- genRandCurves((nb_curves+1):(nb_curves+nb_curves_per_request)) | |
34 | fwrite(curves, temp_file, append=FALSE, sep=",") | |
35 | # Required hack: add brackets (PostgreSQL syntax ...) | |
36 | system(paste("sed -i 's/\\(.*\\)/{\\1}/g' ",temp_file,sep='')) | |
37 | query <- paste("COPY series (curve) FROM '", normalizePath(temp_file), "';", sep='') | |
38 | dbSendQuery(con, query) | |
39 | nb_curves <- nb_curves + nb_curves_per_request | |
40 | } | |
41 | ||
42 | dbDisconnect(con) | |
43 | unlink(temp_file) |