function(dataHO, targetHO, param) {
require(rpart)
method <- ifelse(task == "classification", "class", "anova")
+ if (is.null(colnames(dataHO)))
+ colnames(dataHO) <- paste0("V", 1:ncol(dataHO))
df <- data.frame(cbind(dataHO, target=targetHO))
model <- rpart::rpart(target ~ ., df, method=method, control=list(cp=param))
- function(X) predict(model, X)
+ function(X) {
+ if (is.null(colnames(X)))
+ colnames(X) <- paste0("V", 1:ncol(X))
+ predict(model, as.data.frame(X))
+ }
}
}
else if (family == "rf") {
require(rpart)
df <- data.frame(cbind(data, target=target))
ctrl <- list(
+ cp = 0,
minsplit = 2,
minbucket = 1,
- maxcompete = 0,
- maxsurrogate = 0,
- usesurrogate = 0,
- xval = 0,
- surrogatestyle = 0,
- maxdepth = 30)
+ xval = 0)
r <- rpart(target ~ ., df, method="class", control=ctrl)
cps <- r$cptable[-1,1]
- if (length(cps) <= 11)
+ if (length(cps) <= 11) {
+ if (length(cps == 0))
+ stop("No cross-validation possible: select another model")
return (cps)
+ }
step <- (length(cps) - 1) / 10
cps[unique(round(seq(1, length(cps), step)))]
}
task <- ifelse(is.numeric(df[,t_idx]), "regression", "classification")
n <- nrow(df)
test_indices <- sample( n, round(n / ifelse(n >= 500, 10, 5)) )
- a <- agghoo(df[-test_indices,-t_idx], df[-test_indices,t_idx], task, ...)
+ data <- as.matrix(df[-test_indices,-t_idx])
+ target <- df[-test_indices,t_idx]
+ test <- as.matrix(df[test_indices,-t_idx])
+ a <- agghoo(data, target, task, ...)
a$fit()
if (verbose) {
print("Parameters:")
print(unlist(a$getParams()))
}
- pa <- a$predict(df[test_indices,-t_idx])
+ pa <- a$predict(test)
err_a <- ifelse(task == "classification",
mean(pa != df[test_indices,t_idx]),
mean(abs(pa - df[test_indices,t_idx])))
if (verbose)
print(paste("error agghoo:", err_a))
# Compare with standard cross-validation:
- s <- standardCV(df[-test_indices,-t_idx], df[-test_indices,t_idx], task, ...)
+ s <- standardCV(data, target, task, ...)
if (verbose)
print(paste( "Parameter:", s$param ))
- ps <- s$model(df[test_indices,-t_idx])
+ ps <- s$model(test)
err_s <- ifelse(task == "classification",
mean(ps != df[test_indices,t_idx]),
mean(abs(ps - df[test_indices,t_idx])))
compareMulti <- function(df, t_idx, task = NULL, N = 100, nc = NA, ...) {
if (is.na(nc))
nc <- detectCores()
- errors <- mclapply(1:N,
- function(n) {
- compareToCV(df, t_idx, task, n, verbose=FALSE, ...) },
- mc.cores = nc)
+ compareOne <- function(n) {
+ print(n)
+ compareToCV(df, t_idx, task, n, verbose=FALSE, ...)
+ }
+ errors <- if (nc >= 2) {
+ mclapply(1:N, compareOne, mc.cores = nc)
+ } else {
+ lapply(1:N, compareOne)
+ }
print("error agghoo vs. cross-validation:")
Reduce('+', errors) / N
}