# rseed: >= 0 for reproducibility.
compareToCV(data, target_column_index, rseed = -1)
+
+# Average over N runs:
+
+> compareMulti(iris, 5, N=100)
+[1] "error agghoo vs. cross-validation:"
+[1] 0.04266667 0.04566667
+
+> compareMulti(PimaIndiansDiabetes, 9, N=100)
+[1] "error agghoo vs. cross-validation:"
+[1] 0.2579221 0.2645455
best_model[[ sample(length(best_model), 1) ]]
}
-compareToCV <- function(df, t_idx, task=NULL, rseed=-1, verbose=TRUE) {
+compareToCV <- function(df, t_idx, task=NULL, rseed=-1, verbose=TRUE, ...) {
if (rseed >= 0)
set.seed(rseed)
if (is.null(task))
task <- ifelse(is.numeric(df[,t_idx]), "regression", "classification")
n <- nrow(df)
test_indices <- sample( n, round(n / ifelse(n >= 500, 10, 5)) )
- a <- agghoo(df[-test_indices,-t_idx], df[-test_indices,t_idx], task)
+ a <- agghoo(df[-test_indices,-t_idx], df[-test_indices,t_idx], task, ...)
a$fit()
if (verbose) {
print("Parameters:")
if (verbose)
print(paste("error agghoo:", err_a))
# Compare with standard cross-validation:
- s <- standardCV(df[-test_indices,-t_idx], df[-test_indices,t_idx], task)
+ s <- standardCV(df[-test_indices,-t_idx], df[-test_indices,t_idx], task, ...)
if (verbose)
print(paste( "Parameter:", s$param ))
ps <- s$model(df[test_indices,-t_idx])
mean(abs(ps - df[test_indices,t_idx])))
if (verbose)
print(paste("error CV:", err_s))
- c(err_a, err_s)
+ invisible(c(err_a, err_s))
}
library(parallel)
-compareMulti <- function(df, t_idx, task = NULL, N = 100, nc = NA) {
+compareMulti <- function(df, t_idx, task = NULL, N = 100, nc = NA, ...) {
if (is.na(nc))
nc <- detectCores()
errors <- mclapply(1:N,
function(n) {
- compareToCV(df, t_idx, task, n, verbose=FALSE) },
+ compareToCV(df, t_idx, task, n, verbose=FALSE, ...) },
mc.cores = nc)
print("error agghoo vs. cross-validation:")
Reduce('+', errors) / N