X-Git-Url: https://git.auder.net/?p=agghoo.git;a=blobdiff_plain;f=agghoo.Rcheck%2F00_pkg_src%2Fagghoo%2FR%2FcompareTo.R;fp=agghoo.Rcheck%2F00_pkg_src%2Fagghoo%2FR%2FcompareTo.R;h=0000000000000000000000000000000000000000;hp=fe5b24d4be8a526bbbb4149d713490403f948cb3;hb=16906f6e8c432b811ddf99da1b18a2a357a75235;hpb=97f16440280a40a49c4898a75942e374880bfca3 diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/compareTo.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/compareTo.R deleted file mode 100644 index fe5b24d..0000000 --- a/agghoo.Rcheck/00_pkg_src/agghoo/R/compareTo.R +++ /dev/null @@ -1,247 +0,0 @@ -#' standardCV_core -#' -#' Cross-validation method, added here as an example. -#' Parameters are described in ?agghoo and ?AgghooCV -standardCV_core <- function(data, target, task, gmodel, params, loss, CV) { - n <- nrow(data) - shuffle_inds <- NULL - if (CV$type == "vfold" && CV$shuffle) - shuffle_inds <- sample(n, n) - list_testinds <- list() - for (v in seq_len(CV$V)) - list_testinds[[v]] <- get_testIndices(n, CV, v, shuffle_inds) - gmodel <- agghoo::Model$new(data, target, task, gmodel, params) - best_error <- Inf - best_p <- NULL - for (p in seq_len(gmodel$nmodels)) { - error <- Reduce('+', lapply(seq_len(CV$V), function(v) { - testIdx <- list_testinds[[v]] - d <- splitTrainTest(data, target, testIdx) - model_pred <- gmodel$get(d$dataTrain, d$targetTrain, p) - prediction <- model_pred(d$dataTest) - loss(prediction, d$targetTest) - }) ) - if (error <= best_error) { - if (error == best_error) - best_p[[length(best_p)+1]] <- p - else { - best_p <- list(p) - best_error <- error - } - } - } - chosenP <- best_p[[ sample(length(best_p), 1) ]] - list(model=gmodel$get(data, target, chosenP), param=gmodel$getParam(chosenP)) -} - -#' CVvoting_core -#' -#' "voting" cross-validation method, added here as an example. -#' Parameters are described in ?agghoo and ?AgghooCV -CVvoting_core <- function(data, target, task, gmodel, params, loss, CV) { - CV <- checkCV(CV) - n <- nrow(data) - shuffle_inds <- NULL - if (CV$type == "vfold" && CV$shuffle) - shuffle_inds <- sample(n, n) - gmodel <- agghoo::Model$new(data, target, task, gmodel, params) - bestP <- rep(0, gmodel$nmodels) - for (v in seq_len(CV$V)) { - test_indices <- get_testIndices(n, CV, v, shuffle_inds) - d <- splitTrainTest(data, target, test_indices) - best_p <- NULL - best_error <- Inf - for (p in seq_len(gmodel$nmodels)) { - model_pred <- gmodel$get(d$dataTrain, d$targetTrain, p) - prediction <- model_pred(d$dataTest) - error <- loss(prediction, d$targetTest) - if (error <= best_error) { - if (error == best_error) - best_p[[length(best_p)+1]] <- p - else { - best_p <- list(p) - best_error <- error - } - } - } - for (p in best_p) - bestP[p] <- bestP[p] + 1 - } - # Choose a param at random in case of ex-aequos: - maxP <- max(bestP) - chosenP <- sample(which(bestP == maxP), 1) - list(model=gmodel$get(data, target, chosenP), param=gmodel$getParam(chosenP)) -} - -#' standardCV_run -#' -#' Run and eval the standard cross-validation procedure. -#' Parameters are rather explicit except "floss", which corresponds to the -#' "final" loss function, applied to compute the error on testing dataset. -standardCV_run <- function( - dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ... -) { - args <- list(...) - task <- checkTask(args$task, targetTrain) - modPar <- checkModPar(args$gmodel, args$params) - loss <- checkLoss(args$loss, task) - CV <- checkCV(args$CV) - s <- standardCV_core( - dataTrain, targetTrain, task, modPar$gmodel, modPar$params, loss, CV) - if (verbose) - print(paste( "Parameter:", s$param )) - p <- s$model(dataTest) - err <- floss(p, targetTest) - if (verbose) - print(paste("error CV:", err)) - invisible(err) -} - -#' CVvoting_run -#' -#' Run and eval the voting cross-validation procedure. -#' Parameters are rather explicit except "floss", which corresponds to the -#' "final" loss function, applied to compute the error on testing dataset. -CVvoting_run <- function( - dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ... -) { - args <- list(...) - task <- checkTask(args$task, targetTrain) - modPar <- checkModPar(args$gmodel, args$params) - loss <- checkLoss(args$loss, task) - CV <- checkCV(args$CV) - s <- CVvoting_core( - dataTrain, targetTrain, task, modPar$gmodel, modPar$params, loss, CV) - if (verbose) - print(paste( "Parameter:", s$param )) - p <- s$model(dataTest) - err <- floss(p, targetTest) - if (verbose) - print(paste("error CV:", err)) - invisible(err) -} - -#' agghoo_run -#' -#' Run and eval the agghoo procedure. -#' Parameters are rather explicit except "floss", which corresponds to the -#' "final" loss function, applied to compute the error on testing dataset. -agghoo_run <- function( - dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ... -) { - args <- list(...) - CV <- checkCV(args$CV) - # Must remove CV arg, or agghoo will complain "error: unused arg" - args$CV <- NULL - a <- do.call(agghoo, c(list(data=dataTrain, target=targetTrain), args)) - a$fit(CV) - if (verbose) { - print("Parameters:") - print(unlist(a$getParams())) - } - pa <- a$predict(dataTest) - err <- floss(pa, targetTest) - if (verbose) - print(paste("error agghoo:", err)) - invisible(err) -} - -#' compareTo -#' -#' Compare a list of learning methods (or run only one), on data/target. -#' -#' @param data Data matrix or data.frame -#' @param target Target vector (generally) -#' @param method_s Either a single function, or a list -#' (examples: agghoo_run, standardCV_run) -#' @param rseed Seed of the random generator (-1 means "random seed") -#' @param floss Loss function to compute the error on testing dataset. -#' @param verbose TRUE to request methods to be verbose. -#' @param ... arguments passed to method_s function(s) -#' -#' @export -compareTo <- function( - data, target, method_s, rseed=-1, floss=NULL, verbose=TRUE, ... -) { - if (rseed >= 0) - set.seed(rseed) - n <- nrow(data) - test_indices <- sample( n, round(n / ifelse(n >= 500, 10, 5)) ) - d <- splitTrainTest(data, target, test_indices) - - # Set error function to be used on model outputs (not in core method) - task <- checkTask(list(...)$task, target) - if (is.null(floss)) { - floss <- function(y1, y2) { - ifelse(task == "classification", mean(y1 != y2), mean(abs(y1 - y2))) - } - } - - # Run (and compare) all methods: - runOne <- function(o) { - o(d$dataTrain, d$dataTest, d$targetTrain, d$targetTest, floss, verbose, ...) - } - errors <- c() - if (is.list(method_s)) - errors <- sapply(method_s, runOne) - else if (is.function(method_s)) - errors <- runOne(method_s) - invisible(errors) -} - -#' compareMulti -#' -#' Run compareTo N times in parallel. -#' -#' @inheritParams compareTo -#' @param N Number of calls to method(s) -#' @param nc Number of cores. Set to parallel::detectCores() if undefined. -#' Set it to any value <=1 to say "no parallelism". -#' @param verbose TRUE to print task numbers and "Errors:" in the end. -#' -#' @export -compareMulti <- function( - data, target, method_s, N=100, nc=NA, floss=NULL, verbose=TRUE, ... -) { - base::require(parallel) - if (is.na(nc)) - nc <- parallel::detectCores() - - # "One" comparison for each method in method_s (list) - compareOne <- function(n) { - if (verbose) - print(n) - compareTo(data, target, method_s, n, floss, verbose=FALSE, ...) - } - - errors <- if (nc >= 2) { - parallel::mclapply(1:N, compareOne, mc.cores = nc) - } else { - lapply(1:N, compareOne) - } - if (verbose) - print("Errors:") - Reduce('+', errors) / N -} - -#' compareRange -#' -#' Run compareMulti on several values of the parameter V. -#' -#' @inheritParams compareMulti -#' @param V_range Values of V to be tested. -#' -#' @export -compareRange <- function( - data, target, method_s, N=100, nc=NA, floss=NULL, V_range=c(10,15,20), ... -) { - args <- list(...) - # Avoid warnings if V is left unspecified: - CV <- suppressWarnings( checkCV(args$CV) ) - errors <- lapply(V_range, function(V) { - args$CV$V <- V - do.call(compareMulti, c(list(data=data, target=target, method_s=method_s, - N=N, nc=nc, floss=floss, verbose=F), args)) - }) - print(paste(V_range, errors)) -}