From: Benjamin Auder Date: Fri, 9 Sep 2022 15:49:44 +0000 (+0200) Subject: Update package to send on CRAN X-Git-Url: https://git.auder.net/doc/html/packages.html?a=commitdiff_plain;h=97f16440280a40a49c4898a75942e374880bfca3;p=agghoo.git Update package to send on CRAN --- diff --git a/DESCRIPTION b/DESCRIPTION index 5e85d59..140abb3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: agghoo -Title: Aggregated Hold-out Cross Validation -Date: 2021-06-05 +Title: Aggregated Hold-Out Cross Validation +Date: 2022-08-30 Version: 0.1-0 Description: The 'agghoo' procedure is an alternative to usual cross-validation. Instead of choosing the best model trained on V subsamples, it determines @@ -17,15 +17,16 @@ Maintainer: Benjamin Auder Depends: R (>= 3.5.0) Imports: + class, + parallel, R6, rpart, - randomForest, FNN Suggests: roxygen2 URL: https://git.auder.net/?p=agghoo.git License: MIT + file LICENSE -RoxygenNote: 7.1.1 +RoxygenNote: 7.2.1 Collate: 'compareTo.R' 'agghoo.R' diff --git a/LICENSE b/LICENSE index 6e92110..094ff81 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,2 @@ -YEAR: 2021 +YEAR: 2021-2022 COPYRIGHT HOLDER: Sylvain Arlot, Benjamin Auder, Melina Gallopin, Matthieu Lerasle, Guillaume Maillard diff --git a/NAMESPACE b/NAMESPACE index 74d8bd5..7bbddef 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,14 +1,11 @@ # Generated by roxygen2: do not edit by hand export(AgghooCV) -export(CVvoting_run) export(Model) export(agghoo) -export(agghoo_run) export(compareMulti) export(compareRange) export(compareTo) -export(standardCV_run) importFrom(FNN,knn.reg) importFrom(R6,R6Class) importFrom(class,knn) diff --git a/R/R6_Model.R b/R/R6_Model.R index 1719666..d48825e 100644 --- a/R/R6_Model.R +++ b/R/R6_Model.R @@ -68,7 +68,7 @@ Model <- R6::R6Class("Model", getGmodel = function(family, task) { if (family == "tree") { function(dataHO, targetHO, param) { - require(rpart) + base::require(rpart) method <- ifelse(task == "classification", "class", "anova") if (is.null(colnames(dataHO))) colnames(dataHO) <- paste0("V", 1:ncol(dataHO)) @@ -98,13 +98,13 @@ Model <- R6::R6Class("Model", else if (family == "knn") { if (task == "classification") { function(dataHO, targetHO, param) { - require(class) + base::require(class) function(X) class::knn(dataHO, X, cl=targetHO, k=param) } } else { function(dataHO, targetHO, param) { - require(FNN) + base::require(FNN) function(X) FNN::knn.reg(dataHO, X, y=targetHO, k=param)$pred } } @@ -114,7 +114,7 @@ Model <- R6::R6Class("Model", getParams = function(family, data, target, task) { if (family == "tree") { # Run rpart once to obtain a CV grid for parameter cp - require(rpart) + base::require(rpart) df <- data.frame(cbind(data, target=target)) ctrl <- list( cp = 0, diff --git a/R/compareTo.R b/R/compareTo.R index 28cb711..fe5b24d 100644 --- a/R/compareTo.R +++ b/R/compareTo.R @@ -78,8 +78,6 @@ CVvoting_core <- function(data, target, task, gmodel, params, loss, CV) { #' Run and eval the standard cross-validation procedure. #' Parameters are rather explicit except "floss", which corresponds to the #' "final" loss function, applied to compute the error on testing dataset. -#' -#' @export standardCV_run <- function( dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ... ) { @@ -104,8 +102,6 @@ standardCV_run <- function( #' Run and eval the voting cross-validation procedure. #' Parameters are rather explicit except "floss", which corresponds to the #' "final" loss function, applied to compute the error on testing dataset. -#' -#' @export CVvoting_run <- function( dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ... ) { @@ -130,8 +126,6 @@ CVvoting_run <- function( #' Run and eval the agghoo procedure. #' Parameters are rather explicit except "floss", which corresponds to the #' "final" loss function, applied to compute the error on testing dataset. -#' -#' @export agghoo_run <- function( dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ... ) { @@ -209,7 +203,7 @@ compareTo <- function( compareMulti <- function( data, target, method_s, N=100, nc=NA, floss=NULL, verbose=TRUE, ... ) { - require(parallel) + base::require(parallel) if (is.na(nc)) nc <- parallel::detectCores() diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/DESCRIPTION b/agghoo.Rcheck/00_pkg_src/agghoo/DESCRIPTION new file mode 100644 index 0000000..21f9ca3 --- /dev/null +++ b/agghoo.Rcheck/00_pkg_src/agghoo/DESCRIPTION @@ -0,0 +1,26 @@ +Package: agghoo +Title: Aggregated Hold-Out Cross Validation +Date: 2022-08-30 +Version: 0.1-0 +Description: The 'agghoo' procedure is an alternative to usual cross-validation. + Instead of choosing the best model trained on V subsamples, it determines + a winner model for each subsample, and then aggregate the V outputs. + For the details, see "Aggregated hold-out" by Guillaume Maillard, + Sylvain Arlot, Matthieu Lerasle (2021) + published in Journal of Machine Learning Research 22(20):1--55. +Author: Sylvain Arlot [cph,ctb], + Benjamin Auder [aut,cre,cph], + Melina Gallopin [cph,ctb], + Matthieu Lerasle [cph,ctb], + Guillaume Maillard [cph,ctb] +Maintainer: Benjamin Auder +Depends: R (>= 3.5.0) +Imports: class, parallel, R6, rpart, FNN +Suggests: roxygen2 +URL: https://git.auder.net/?p=agghoo.git +License: MIT + file LICENSE +RoxygenNote: 7.2.1 +Collate: 'compareTo.R' 'agghoo.R' 'R6_AgghooCV.R' 'R6_Model.R' + 'checks.R' 'utils.R' 'A_NAMESPACE.R' +NeedsCompilation: no +Packaged: 2022-09-09 15:45:56 UTC; auder diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/LICENSE b/agghoo.Rcheck/00_pkg_src/agghoo/LICENSE new file mode 100644 index 0000000..094ff81 --- /dev/null +++ b/agghoo.Rcheck/00_pkg_src/agghoo/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2021-2022 +COPYRIGHT HOLDER: Sylvain Arlot, Benjamin Auder, Melina Gallopin, Matthieu Lerasle, Guillaume Maillard diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/NAMESPACE b/agghoo.Rcheck/00_pkg_src/agghoo/NAMESPACE new file mode 100644 index 0000000..7bbddef --- /dev/null +++ b/agghoo.Rcheck/00_pkg_src/agghoo/NAMESPACE @@ -0,0 +1,13 @@ +# Generated by roxygen2: do not edit by hand + +export(AgghooCV) +export(Model) +export(agghoo) +export(compareMulti) +export(compareRange) +export(compareTo) +importFrom(FNN,knn.reg) +importFrom(R6,R6Class) +importFrom(class,knn) +importFrom(rpart,rpart) +importFrom(stats,ppr) diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/A_NAMESPACE.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/A_NAMESPACE.R new file mode 100644 index 0000000..0466833 --- /dev/null +++ b/agghoo.Rcheck/00_pkg_src/agghoo/R/A_NAMESPACE.R @@ -0,0 +1,7 @@ +#' @include utils.R +#' @include checks.R +#' @include R6_Model.R +#' @include R6_AgghooCV.R +#' @include agghoo.R +#' @include compareTo.R +NULL diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_AgghooCV.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_AgghooCV.R new file mode 100644 index 0000000..328c141 --- /dev/null +++ b/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_AgghooCV.R @@ -0,0 +1,115 @@ +#' @title R6 class with agghoo functions fit() and predict(). +#' +#' @description +#' Class encapsulating the methods to run to obtain the best predictor +#' from the list of models (see 'Model' class). +#' +#' @importFrom R6 R6Class +#' +#' @export +AgghooCV <- R6::R6Class("AgghooCV", + public = list( + #' @description Create a new AgghooCV object. + #' @param data Matrix or data.frame + #' @param target Vector of targets (generally numeric or factor) + #' @param task "regression" or "classification". + #' Default: classification if target not numeric. + #' @param gmodel Generic model returning a predictive function + #' Default: tree if mixed data, knn/ppr otherwise. + #' @param loss Function assessing the error of a prediction + #' Default: error rate or mean(abs(error)). + initialize = function(data, target, task, gmodel, loss) { + private$data <- data + private$target <- target + private$task <- task + private$gmodel <- gmodel + private$loss <- loss + }, + #' @description Fit an agghoo model. + #' @param CV List describing cross-validation to run. Slots: \cr + #' - type: 'vfold' or 'MC' for Monte-Carlo (default: MC) \cr + #' - V: number of runs (default: 10) \cr + #' - test_size: percentage of data in the test dataset, for MC + #' (irrelevant for V-fold). Default: 0.2. \cr + #' - shuffle: wether or not to shuffle data before V-fold. + #' Irrelevant for Monte-Carlo; default: TRUE \cr + #' Default (if NULL): type="MC", V=10, test_size=0.2 + fit = function(CV = NULL) { + CV <- checkCV(CV) + n <- nrow(private$data) + shuffle_inds <- NULL + if (CV$type == "vfold" && CV$shuffle) + shuffle_inds <- sample(n, n) + # Result: list of V predictive models (+ parameters for info) + private$pmodels <- list() + for (v in seq_len(CV$V)) { + # Prepare train / test data and target, from full dataset. + # dataHO: "data Hold-Out" etc. + test_indices <- get_testIndices(n, CV, v, shuffle_inds) + d <- splitTrainTest(private$data, private$target, test_indices) + best_model <- NULL + best_error <- Inf + for (p in seq_len(private$gmodel$nmodels)) { + model_pred <- private$gmodel$get(d$dataTrain, d$targetTrain, p) + prediction <- model_pred(d$dataTest) + error <- private$loss(prediction, d$targetTest) + if (error <= best_error) { + newModel <- list(model=model_pred, param=private$gmodel$getParam(p)) + if (error == best_error) + best_model[[length(best_model)+1]] <- newModel + else { + best_model <- list(newModel) + best_error <- error + } + } + } + # Choose a model at random in case of ex-aequos + private$pmodels[[v]] <- best_model[[ sample(length(best_model),1) ]] + } + }, + #' @description Predict an agghoo model (after calling fit()) + #' @param X Matrix or data.frame to predict + predict = function(X) { + if (!is.matrix(X) && !is.data.frame(X)) + stop("X: matrix or data.frame") + if (!is.list(private$pmodels)) { + print("Please call $fit() method first") + return (invisible(NULL)) + } + V <- length(private$pmodels) + oneLineX <- X[1,] + if (is.matrix(X)) + # HACK: R behaves differently with data frames and matrices. + oneLineX <- t(as.matrix(oneLineX)) + if (length(private$pmodels[[1]]$model(oneLineX)) >= 2) + # Soft classification: + return (Reduce("+", lapply(private$pmodels, function(m) m$model(X))) / V) + n <- nrow(X) + all_predictions <- as.data.frame(matrix(nrow=n, ncol=V)) + for (v in 1:V) + all_predictions[,v] <- private$pmodels[[v]]$model(X) + if (private$task == "regression") + # Easy case: just average each row + return (rowMeans(all_predictions)) + # "Hard" classification: + apply(all_predictions, 1, function(row) { + t <- table(row) + # Next lines in case of ties (broken at random) + tmax <- max(t) + sample( names(t)[which(t == tmax)], 1 ) + }) + }, + #' @description Return the list of V best parameters (after calling fit()) + getParams = function() { + lapply(private$pmodels, function(m) m$param) + } + ), + private = list( + data = NULL, + target = NULL, + task = NULL, + gmodel = NULL, + loss = NULL, + pmodels = NULL + ) +) diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_Model.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_Model.R new file mode 100644 index 0000000..d48825e --- /dev/null +++ b/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_Model.R @@ -0,0 +1,157 @@ +#' @title R6 class representing a (generic) model. +#' +#' @description +#' "Model" class, containing a (generic) learning function, which from +#' data + target [+ params] returns a prediction function X --> y. +#' Parameters for cross-validation are either provided or estimated. +#' Model family can be chosen among "tree", "ppr" and "knn" for now. +#' +#' @importFrom FNN knn.reg +#' @importFrom class knn +#' @importFrom stats ppr +#' @importFrom rpart rpart +#' +#' @export +Model <- R6::R6Class("Model", + public = list( + #' @field nmodels Number of parameters (= number of [predictive] models) + nmodels = NA, + #' @description Create a new generic model. + #' @param data Matrix or data.frame + #' @param target Vector of targets (generally numeric or factor) + #' @param task "regression" or "classification" + #' @param gmodel Generic model returning a predictive function; chosen + #' automatically given data and target nature if not provided. + #' @param params List of parameters for cross-validation (each defining a model) + initialize = function(data, target, task, gmodel = NULL, params = NULL) { + if (is.null(gmodel)) { + # (Generic) model not provided + all_numeric <- is.numeric(as.matrix(data)) + if (!all_numeric) + # At least one non-numeric column: use trees + gmodel = "tree" + else + # Numerical data + gmodel = ifelse(task == "regression", "ppr", "knn") + } + if (is.null(params)) + # Here, gmodel is a string (= its family), + # because a custom model must be given with its parameters. + params <- as.list(private$getParams(gmodel, data, target, task)) + private$params <- params + if (is.character(gmodel)) + gmodel <- private$getGmodel(gmodel, task) + private$gmodel <- gmodel + self$nmodels <- length(private$params) + }, + #' @description + #' Returns the model at index "index", trained on dataHO/targetHO. + #' @param dataHO Matrix or data.frame + #' @param targetHO Vector of targets (generally numeric or factor) + #' @param index Index of the model in 1...nmodels + get = function(dataHO, targetHO, index) { + private$gmodel(dataHO, targetHO, private$params[[index]]) + }, + #' @description + #' Returns the parameter at index "index". + #' @param index Index of the model in 1...nmodels + getParam = function(index) { + private$params[[index]] + } + ), + private = list( + # No need to expose model or parameters list + gmodel = NULL, + params = NULL, + # Main function: given a family, return a generic model, which in turn + # will output a predictive model from data + target + params. + getGmodel = function(family, task) { + if (family == "tree") { + function(dataHO, targetHO, param) { + base::require(rpart) + method <- ifelse(task == "classification", "class", "anova") + if (is.null(colnames(dataHO))) + colnames(dataHO) <- paste0("V", 1:ncol(dataHO)) + df <- data.frame(cbind(dataHO, target=targetHO)) + model <- rpart::rpart(target ~ ., df, method=method, control=list(cp=param)) + if (task == "regression") + type <- "vector" + else { + if (is.null(dim(targetHO))) + type <- "class" + else + type <- "prob" + } + function(X) { + if (is.null(colnames(X))) + colnames(X) <- paste0("V", 1:ncol(X)) + predict(model, as.data.frame(X), type=type) + } + } + } + else if (family == "ppr") { + function(dataHO, targetHO, param) { + model <- stats::ppr(dataHO, targetHO, nterms=param) + function(X) predict(model, X) + } + } + else if (family == "knn") { + if (task == "classification") { + function(dataHO, targetHO, param) { + base::require(class) + function(X) class::knn(dataHO, X, cl=targetHO, k=param) + } + } + else { + function(dataHO, targetHO, param) { + base::require(FNN) + function(X) FNN::knn.reg(dataHO, X, y=targetHO, k=param)$pred + } + } + } + }, + # Return a default list of parameters, given a gmodel family + getParams = function(family, data, target, task) { + if (family == "tree") { + # Run rpart once to obtain a CV grid for parameter cp + base::require(rpart) + df <- data.frame(cbind(data, target=target)) + ctrl <- list( + cp = 0, + minsplit = 2, + minbucket = 1, + xval = 0) + method <- ifelse(task == "classification", "class", "anova") + r <- rpart(target ~ ., df, method=method, control=ctrl) + cps <- r$cptable[-1,1] + if (length(cps) <= 1) + stop("No cross-validation possible: select another model") + if (length(cps) <= 11) + return (cps) + step <- (length(cps) - 1) / 10 + cps[unique(round(seq(1, length(cps), step)))] + } + else if (family == "ppr") + # This is nterms in ppr() function + 1:10 + else if (family == "knn") { + n <- nrow(data) + # Choose ~10 NN values + K <- length(unique(target)) + if (n <= 10) + return (1:(n-1)) + sqrt_n <- sqrt(n) + step <- (2*sqrt_n - 1) / 10 + grid <- unique(round(seq(1, 2*sqrt_n, step))) + if (K == 2) { + # Common binary classification case: odd number of neighbors + for (i in 2:11) { + if (grid[i] %% 2 == 0) + grid[i] <- grid[i] + 1 #arbitrary choice + } + } + grid + } + } + ) +) diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/agghoo.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/agghoo.R new file mode 100644 index 0000000..48ac741 --- /dev/null +++ b/agghoo.Rcheck/00_pkg_src/agghoo/R/agghoo.R @@ -0,0 +1,58 @@ +#' agghoo +#' +#' Run the (core) agghoo procedure. +#' Arguments specify the list of models, their parameters and the +#' cross-validation settings, among others. +#' +#' @param data Data frame or matrix containing the data in lines. +#' @param target The target values to predict. Generally a vector, +#' but possibly a matrix in the case of "soft classification". +#' @param task "classification" or "regression". Default: +#' regression if target is numerical, classification otherwise. +#' @param gmodel A "generic model", which is a function returning a predict +#' function (taking X as only argument) from the tuple +#' (dataHO, targetHO, param), where 'HO' stands for 'Hold-Out', +#' referring to cross-validation. Cross-validation is run on an array +#' of 'param's. See params argument. Default: see R6::Model. +#' @param params A list of parameters. Often, one list cell is just a +#' numerical value, but in general it could be of any type. +#' Default: see R6::Model. +#' @param loss A function assessing the error of a prediction. +#' Arguments are y1 and y2 (comparing a prediction to known values). +#' loss(y1, y2) --> real number (error). Default: see R6::AgghooCV. +#' +#' @return +#' An R6::AgghooCV object o. Then, call o$fit() and finally o$predict(newData) +#' +#' @examples +#' # Regression: +#' a_reg <- agghoo(iris[,-c(2,5)], iris[,2]) +#' a_reg$fit() +#' pr <- a_reg$predict(iris[,-c(2,5)] + rnorm(450, sd=0.1)) +#' # Classification +#' a_cla <- agghoo(iris[,-5], iris[,5]) +#' a_cla$fit() +#' pc <- a_cla$predict(iris[,-5] + rnorm(600, sd=0.1)) +#' +#' @seealso Function \code{\link{compareTo}} +#' +#' @references +#' Guillaume Maillard, Sylvain Arlot, Matthieu Lerasle. "Aggregated hold-out". +#' Journal of Machine Learning Research 22(20):1--55, 2021. +#' +#' @export +agghoo <- function( + data, target, task = NULL, gmodel = NULL, params = NULL, loss = NULL +) { + # Args check: + checkDaTa(data, target) + task <- checkTask(task, target) + modPar <- checkModPar(gmodel, params) + loss <- checkLoss(loss, task) + + # Build Model object (= list of parameterized models) + model <- Model$new(data, target, task, modPar$gmodel, modPar$params) + + # Return AgghooCV object, to run and predict + AgghooCV$new(data, target, task, model, loss) +} diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/checks.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/checks.R new file mode 100644 index 0000000..a19d55f --- /dev/null +++ b/agghoo.Rcheck/00_pkg_src/agghoo/R/checks.R @@ -0,0 +1,102 @@ +# Internal usage: check and fill arguments with default values. + +defaultLoss_classif <- function(y1, y2) { + if (is.null(dim(y1))) + # Standard case: "hard" classification + mean(y1 != y2) + else { + # "Soft" classification: predict() outputs a probability matrix + # In this case "target" could be in matrix form. + if (!is.null(dim(y2))) + mean(rowSums(abs(y1 - y2))) + else { + # Or not: y2 is a "factor". + y2 <- as.character(y2) + # NOTE: the user should provide target in matrix form because + # matching y2 with columns is rather inefficient! + names <- colnames(y1) + positions <- list() + for (idx in seq_along(names)) + positions[[ names[idx] ]] <- idx + mean(vapply( + seq_along(y2), + function(idx) sum(abs(y1[idx,] - positions[[ y2[idx] ]])), + 0)) + } + } +} + +defaultLoss_regress <- function(y1, y2) { + mean(abs(y1 - y2)) +} + +# TODO: allow strings like "MSE", "abs" etc +checkLoss <- function(loss, task) { + if (!is.null(loss) && !is.function(loss)) + stop("loss: function(y1, y2) --> Real") + if (is.null(loss)) { + loss <- if (task == "classification") { + defaultLoss_classif + } else { + defaultLoss_regress + } + } + loss +} + +checkCV <- function(CV) { + if (is.null(CV)) + CV <- list(type="MC", V=10, test_size=0.2, shuffle=TRUE) + else { + if (!is.list(CV)) + stop("CV: list of type('MC'|'vfold'), V(integer, [test_size, shuffle]") + if (is.null(CV$type)) { + warning("CV$type not provided: set to MC") + CV$type <- "MC" + } + if (is.null(CV$V)) { + warning("CV$V not provided: set to 10") + CV$V <- 10 + } + if (CV$type == "MC" && is.null(CV$test_size)) + CV$test_size <- 0.2 + if (CV$type == "vfold" && is.null(CV$shuffle)) + CV$shuffle <- TRUE + } + CV +} + +checkDaTa <- function(data, target) { + if (!is.data.frame(data) && !is.matrix(data)) + stop("data: data.frame or matrix") + if (is.data.frame(target) || is.matrix(target)) { + if (!is.numeric(target)) + stop("multi-columns target must be a probability matrix") + if (nrow(target) != nrow(data) || ncol(target) == 1) + stop("target probability matrix does not match data size") + } + else if (!is.numeric(target) && !is.factor(target) && !is.character(target)) + stop("target: numeric, factor or character vector") +} + +checkTask <- function(task, target) { + if (!is.null(task)) + task <- match.arg(task, c("classification", "regression")) + ifelse(is.numeric(target), "regression", "classification") +} + +checkModPar <- function(gmodel, params) { + if (is.character(gmodel)) + gmodel <- match.arg(gmodel, c("knn", "ppr", "rf", "tree")) + else if (!is.null(gmodel) && !is.function(gmodel)) + stop("gmodel: function(dataHO, targetHO, param) --> function(X) --> y") + if (is.numeric(params) || is.character(params)) + params <- as.list(params) + if (!is.list(params) && !is.null(params)) + stop("params: numerical, character, or list (passed to model)") + if (is.function(gmodel) && !is.list(params)) + stop("params must be provided when using a custom model") + if (is.list(params) && is.null(gmodel)) + stop("model (or family) must be provided when using custom params") + list(gmodel=gmodel, params=params) +} diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/compareTo.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/compareTo.R new file mode 100644 index 0000000..fe5b24d --- /dev/null +++ b/agghoo.Rcheck/00_pkg_src/agghoo/R/compareTo.R @@ -0,0 +1,247 @@ +#' standardCV_core +#' +#' Cross-validation method, added here as an example. +#' Parameters are described in ?agghoo and ?AgghooCV +standardCV_core <- function(data, target, task, gmodel, params, loss, CV) { + n <- nrow(data) + shuffle_inds <- NULL + if (CV$type == "vfold" && CV$shuffle) + shuffle_inds <- sample(n, n) + list_testinds <- list() + for (v in seq_len(CV$V)) + list_testinds[[v]] <- get_testIndices(n, CV, v, shuffle_inds) + gmodel <- agghoo::Model$new(data, target, task, gmodel, params) + best_error <- Inf + best_p <- NULL + for (p in seq_len(gmodel$nmodels)) { + error <- Reduce('+', lapply(seq_len(CV$V), function(v) { + testIdx <- list_testinds[[v]] + d <- splitTrainTest(data, target, testIdx) + model_pred <- gmodel$get(d$dataTrain, d$targetTrain, p) + prediction <- model_pred(d$dataTest) + loss(prediction, d$targetTest) + }) ) + if (error <= best_error) { + if (error == best_error) + best_p[[length(best_p)+1]] <- p + else { + best_p <- list(p) + best_error <- error + } + } + } + chosenP <- best_p[[ sample(length(best_p), 1) ]] + list(model=gmodel$get(data, target, chosenP), param=gmodel$getParam(chosenP)) +} + +#' CVvoting_core +#' +#' "voting" cross-validation method, added here as an example. +#' Parameters are described in ?agghoo and ?AgghooCV +CVvoting_core <- function(data, target, task, gmodel, params, loss, CV) { + CV <- checkCV(CV) + n <- nrow(data) + shuffle_inds <- NULL + if (CV$type == "vfold" && CV$shuffle) + shuffle_inds <- sample(n, n) + gmodel <- agghoo::Model$new(data, target, task, gmodel, params) + bestP <- rep(0, gmodel$nmodels) + for (v in seq_len(CV$V)) { + test_indices <- get_testIndices(n, CV, v, shuffle_inds) + d <- splitTrainTest(data, target, test_indices) + best_p <- NULL + best_error <- Inf + for (p in seq_len(gmodel$nmodels)) { + model_pred <- gmodel$get(d$dataTrain, d$targetTrain, p) + prediction <- model_pred(d$dataTest) + error <- loss(prediction, d$targetTest) + if (error <= best_error) { + if (error == best_error) + best_p[[length(best_p)+1]] <- p + else { + best_p <- list(p) + best_error <- error + } + } + } + for (p in best_p) + bestP[p] <- bestP[p] + 1 + } + # Choose a param at random in case of ex-aequos: + maxP <- max(bestP) + chosenP <- sample(which(bestP == maxP), 1) + list(model=gmodel$get(data, target, chosenP), param=gmodel$getParam(chosenP)) +} + +#' standardCV_run +#' +#' Run and eval the standard cross-validation procedure. +#' Parameters are rather explicit except "floss", which corresponds to the +#' "final" loss function, applied to compute the error on testing dataset. +standardCV_run <- function( + dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ... +) { + args <- list(...) + task <- checkTask(args$task, targetTrain) + modPar <- checkModPar(args$gmodel, args$params) + loss <- checkLoss(args$loss, task) + CV <- checkCV(args$CV) + s <- standardCV_core( + dataTrain, targetTrain, task, modPar$gmodel, modPar$params, loss, CV) + if (verbose) + print(paste( "Parameter:", s$param )) + p <- s$model(dataTest) + err <- floss(p, targetTest) + if (verbose) + print(paste("error CV:", err)) + invisible(err) +} + +#' CVvoting_run +#' +#' Run and eval the voting cross-validation procedure. +#' Parameters are rather explicit except "floss", which corresponds to the +#' "final" loss function, applied to compute the error on testing dataset. +CVvoting_run <- function( + dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ... +) { + args <- list(...) + task <- checkTask(args$task, targetTrain) + modPar <- checkModPar(args$gmodel, args$params) + loss <- checkLoss(args$loss, task) + CV <- checkCV(args$CV) + s <- CVvoting_core( + dataTrain, targetTrain, task, modPar$gmodel, modPar$params, loss, CV) + if (verbose) + print(paste( "Parameter:", s$param )) + p <- s$model(dataTest) + err <- floss(p, targetTest) + if (verbose) + print(paste("error CV:", err)) + invisible(err) +} + +#' agghoo_run +#' +#' Run and eval the agghoo procedure. +#' Parameters are rather explicit except "floss", which corresponds to the +#' "final" loss function, applied to compute the error on testing dataset. +agghoo_run <- function( + dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ... +) { + args <- list(...) + CV <- checkCV(args$CV) + # Must remove CV arg, or agghoo will complain "error: unused arg" + args$CV <- NULL + a <- do.call(agghoo, c(list(data=dataTrain, target=targetTrain), args)) + a$fit(CV) + if (verbose) { + print("Parameters:") + print(unlist(a$getParams())) + } + pa <- a$predict(dataTest) + err <- floss(pa, targetTest) + if (verbose) + print(paste("error agghoo:", err)) + invisible(err) +} + +#' compareTo +#' +#' Compare a list of learning methods (or run only one), on data/target. +#' +#' @param data Data matrix or data.frame +#' @param target Target vector (generally) +#' @param method_s Either a single function, or a list +#' (examples: agghoo_run, standardCV_run) +#' @param rseed Seed of the random generator (-1 means "random seed") +#' @param floss Loss function to compute the error on testing dataset. +#' @param verbose TRUE to request methods to be verbose. +#' @param ... arguments passed to method_s function(s) +#' +#' @export +compareTo <- function( + data, target, method_s, rseed=-1, floss=NULL, verbose=TRUE, ... +) { + if (rseed >= 0) + set.seed(rseed) + n <- nrow(data) + test_indices <- sample( n, round(n / ifelse(n >= 500, 10, 5)) ) + d <- splitTrainTest(data, target, test_indices) + + # Set error function to be used on model outputs (not in core method) + task <- checkTask(list(...)$task, target) + if (is.null(floss)) { + floss <- function(y1, y2) { + ifelse(task == "classification", mean(y1 != y2), mean(abs(y1 - y2))) + } + } + + # Run (and compare) all methods: + runOne <- function(o) { + o(d$dataTrain, d$dataTest, d$targetTrain, d$targetTest, floss, verbose, ...) + } + errors <- c() + if (is.list(method_s)) + errors <- sapply(method_s, runOne) + else if (is.function(method_s)) + errors <- runOne(method_s) + invisible(errors) +} + +#' compareMulti +#' +#' Run compareTo N times in parallel. +#' +#' @inheritParams compareTo +#' @param N Number of calls to method(s) +#' @param nc Number of cores. Set to parallel::detectCores() if undefined. +#' Set it to any value <=1 to say "no parallelism". +#' @param verbose TRUE to print task numbers and "Errors:" in the end. +#' +#' @export +compareMulti <- function( + data, target, method_s, N=100, nc=NA, floss=NULL, verbose=TRUE, ... +) { + base::require(parallel) + if (is.na(nc)) + nc <- parallel::detectCores() + + # "One" comparison for each method in method_s (list) + compareOne <- function(n) { + if (verbose) + print(n) + compareTo(data, target, method_s, n, floss, verbose=FALSE, ...) + } + + errors <- if (nc >= 2) { + parallel::mclapply(1:N, compareOne, mc.cores = nc) + } else { + lapply(1:N, compareOne) + } + if (verbose) + print("Errors:") + Reduce('+', errors) / N +} + +#' compareRange +#' +#' Run compareMulti on several values of the parameter V. +#' +#' @inheritParams compareMulti +#' @param V_range Values of V to be tested. +#' +#' @export +compareRange <- function( + data, target, method_s, N=100, nc=NA, floss=NULL, V_range=c(10,15,20), ... +) { + args <- list(...) + # Avoid warnings if V is left unspecified: + CV <- suppressWarnings( checkCV(args$CV) ) + errors <- lapply(V_range, function(V) { + args$CV$V <- V + do.call(compareMulti, c(list(data=data, target=target, method_s=method_s, + N=N, nc=nc, floss=floss, verbose=F), args)) + }) + print(paste(V_range, errors)) +} diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/utils.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/utils.R new file mode 100644 index 0000000..823b123 --- /dev/null +++ b/agghoo.Rcheck/00_pkg_src/agghoo/R/utils.R @@ -0,0 +1,30 @@ +# Helper for cross-validation: return the next test indices. +get_testIndices <- function(n, CV, v, shuffle_inds) { + if (CV$type == "vfold") { + # Slice indices (optionnally shuffled) + first_index = round((v-1) * n / CV$V) + 1 + last_index = round(v * n / CV$V) + test_indices = first_index:last_index + if (!is.null(shuffle_inds)) + test_indices <- shuffle_inds[test_indices] + } + else + # Monte-Carlo cross-validation + test_indices = sample(n, round(n * CV$test_size)) + test_indices +} + +# Helper which split data into training and testing parts. +splitTrainTest <- function(data, target, testIdx) { + dataTrain <- data[-testIdx,] + targetTrain <- target[-testIdx] + dataTest <- data[testIdx,] + targetTest <- target[testIdx] + # [HACK] R will cast 1-dim matrices into vectors: + if (!is.matrix(dataTrain) && !is.data.frame(dataTrain)) + dataTrain <- as.matrix(dataTrain) + if (!is.matrix(dataTest) && !is.data.frame(dataTest)) + dataTest <- as.matrix(dataTest) + list(dataTrain=dataTrain, targetTrain=targetTrain, + dataTest=dataTest, targetTest=targetTest) +} diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/README.md b/agghoo.Rcheck/00_pkg_src/agghoo/README.md new file mode 100644 index 0000000..337abcb --- /dev/null +++ b/agghoo.Rcheck/00_pkg_src/agghoo/README.md @@ -0,0 +1,15 @@ +# agghoo + +R package for model selection based on aggregation. +Alternative to standard cross-validation. + +## Install the package + +From GitHub: `devtools::install_github("yagu0/agghoo")` + +Locally, in a terminal: `R CMD INSTALL .` + +## Use the package + + library(agghoo) + ?agghoo diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/TODO b/agghoo.Rcheck/00_pkg_src/agghoo/TODO new file mode 100644 index 0000000..f197d8a --- /dev/null +++ b/agghoo.Rcheck/00_pkg_src/agghoo/TODO @@ -0,0 +1,2 @@ +Support des valeurs manquantes (cf. mlbench::Ozone dataset) +Méthode pour données mixtes ? (que tree actuellement) diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/example/example.R b/agghoo.Rcheck/00_pkg_src/agghoo/example/example.R new file mode 100644 index 0000000..7fae2ce --- /dev/null +++ b/agghoo.Rcheck/00_pkg_src/agghoo/example/example.R @@ -0,0 +1,43 @@ +library(agghoo) + +data(iris) #already there +library(mlbench) +data(PimaIndiansDiabetes) + +# Run only agghoo on iris dataset (split into train/test, etc). +# Default parameters: see ?agghoo and ?AgghooCV +compareTo(iris[,-5], iris[,5], agghoo_run) + +# Run both agghoo and standard CV, specifiying some parameters. +compareTo(iris[,-5], iris[,5], list(agghoo_run, standardCV_run), gmodel="tree") +compareTo(iris[,-5], iris[,5], list(agghoo_run, standardCV_run), + gmodel="knn", params=c(3, 7, 13, 17, 23, 31), + CV = list(type="vfold", V=5, shuffle=T)) + +# Run both agghoo and standard CV, averaging errors over N=10 runs +# (possible for a single method but wouldn't make much sense...). +compareMulti(PimaIndiansDiabetes[,-9], PimaIndiansDiabetes[,9], + list(agghoo_run, standardCV_run), N=10, gmodel="rf") + +# Compare several values of V +compareRange(PimaIndiansDiabetes[,-9], PimaIndiansDiabetes[,9], + list(agghoo_run, standardCV_run), N=10, V_range=c(10, 20, 30)) + +# For example to use average of squared differences. +# Default is "mean(abs(y1 - y2))". +loss2 <- function(y1, y2) mean((y1 - y2)^2) + +# In regression on artificial datasets (TODO: real data?) +data <- mlbench.twonorm(300, 3)$x +target <- rowSums(data) +compareMulti(data, target, list(agghoo_run, standardCV_run), + N=10, gmodel="tree", params=c(1, 3, 5, 7, 9), loss=loss2, + CV = list(type="MC", V=12, test_size=0.3)) + +compareMulti(data, target, list(agghoo_run, standardCV_run), + N=10, floss=loss2, CV = list(type="vfold", V=10, shuffle=F)) + +# Random tests to check that method doesn't fail in 1D case +M <- matrix(rnorm(200), ncol=2) +compareTo(as.matrix(M[,-2]), M[,2], list(agghoo_run, standardCV_run), gmodel="knn") +compareTo(as.matrix(M[,-2]), M[,2], list(agghoo_run, standardCV_run), gmodel="tree") diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/test/TODO b/agghoo.Rcheck/00_pkg_src/agghoo/test/TODO new file mode 100644 index 0000000..50acca1 --- /dev/null +++ b/agghoo.Rcheck/00_pkg_src/agghoo/test/TODO @@ -0,0 +1 @@ +Some unit tests? diff --git a/agghoo.Rcheck/00check.log b/agghoo.Rcheck/00check.log new file mode 100644 index 0000000..684daae --- /dev/null +++ b/agghoo.Rcheck/00check.log @@ -0,0 +1,52 @@ +* using log directory ‘/home/auder/repos/agghoo/agghoo.Rcheck’ +* using R version 4.2.1 (2022-06-23) +* using platform: x86_64-pc-linux-gnu (64-bit) +* using session charset: UTF-8 +* checking for file ‘agghoo/DESCRIPTION’ ... OK +* this is package ‘agghoo’ version ‘0.1-0’ +* checking package namespace information ... OK +* checking package dependencies ... OK +* checking if this is a source package ... OK +* checking if there is a namespace ... OK +* checking for executable files ... OK +* checking for hidden files and directories ... OK +* checking for portable file names ... OK +* checking for sufficient/correct file permissions ... OK +* checking whether package ‘agghoo’ can be installed ... OK +* checking installed package size ... OK +* checking package directory ... OK +* checking DESCRIPTION meta-information ... OK +* checking top-level files ... OK +* checking for left-over files ... OK +* checking index information ... OK +* checking package subdirectories ... OK +* checking R files for non-ASCII characters ... OK +* checking R files for syntax errors ... OK +* checking whether the package can be loaded ... OK +* checking whether the package can be loaded with stated dependencies ... OK +* checking whether the package can be unloaded cleanly ... OK +* checking whether the namespace can be loaded with stated dependencies ... OK +* checking whether the namespace can be unloaded cleanly ... OK +* checking loading without being on the library search path ... OK +* checking dependencies in R code ... OK +* checking S3 generic/method consistency ... OK +* checking replacement functions ... OK +* checking foreign function calls ... OK +* checking R code for possible problems ... NOTE +compareMulti: no visible binding for global variable ‘parallel’ +Undefined global functions or variables: + parallel +* checking for missing documentation entries ... WARNING +Undocumented code objects: + ‘AgghooCV’ ‘Model’ ‘agghoo’ ‘compareMulti’ ‘compareRange’ ‘compareTo’ +All user-level objects in a package should have documentation entries. +See chapter ‘Writing R documentation files’ in the ‘Writing R +Extensions’ manual. +* checking examples ... NONE +* checking PDF version of manual ... WARNING +LaTeX errors when creating PDF version. +This typically indicates Rd problems. +* checking PDF version of manual without index ... ERROR +Re-running with no redirection of stdout/stderr. +* DONE +Status: 1 ERROR, 2 WARNINGs, 1 NOTE diff --git a/agghoo.Rcheck/00install.out b/agghoo.Rcheck/00install.out new file mode 100644 index 0000000..4ec7d20 --- /dev/null +++ b/agghoo.Rcheck/00install.out @@ -0,0 +1,12 @@ +* installing *source* package ‘agghoo’ ... +** using staged installation +** R +** byte-compile and prepare package for lazy loading +** help +No man pages found in package ‘agghoo’ +*** installing help indices +** building package indices +** testing if installed package can be loaded from temporary location +** testing if installed package can be loaded from final location +** testing if installed package keeps a record of temporary installation path +* DONE (agghoo) diff --git a/agghoo.Rcheck/Rdlatex.log b/agghoo.Rcheck/Rdlatex.log new file mode 100644 index 0000000..ed3d8b1 --- /dev/null +++ b/agghoo.Rcheck/Rdlatex.log @@ -0,0 +1,22 @@ +Hmm ... looks like a package +Converting parsed Rd's to LaTeX Creating pdf output from LaTeX ... +warning: kpathsea: configuration file texmf.cnf not found in these directories: /usr/bin:/usr/bin/share/texmf-local/web2c:/usr/bin/share/texmf-dist/web2c:/usr/bin/share/texmf/web2c:/usr/bin/texmf-local/web2c:/usr/bin/texmf-dist/web2c:/usr/bin/texmf/web2c:/usr:/usr/share/texmf-local/web2c:/usr/share/texmf-dist/web2c:/usr/share/texmf/web2c:/usr/texmf-local/web2c:/usr/texmf-dist/web2c:/usr/texmf/web2c://texmf-local/web2c:/://share/texmf-local/web2c://share/texmf-dist/web2c://share/texmf/web2c://texmf-local/web2c://texmf-dist/web2c://texmf/web2c. +This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022/Arch Linux) (preloaded format=pdflatex) + +kpathsea: Running mktexfmt pdflatex.fmt +mktexfmt: No such file or directory +I can't find the format file `pdflatex.fmt'! +Warning in file(con, "r") : + cannot open file 'Rd2.log': No such file or directory +Error in file(con, "r") : cannot open the connection +warning: kpathsea: configuration file texmf.cnf not found in these directories: /usr/bin:/usr/bin/share/texmf-local/web2c:/usr/bin/share/texmf-dist/web2c:/usr/bin/share/texmf/web2c:/usr/bin/texmf-local/web2c:/usr/bin/texmf-dist/web2c:/usr/bin/texmf/web2c:/usr:/usr/share/texmf-local/web2c:/usr/share/texmf-dist/web2c:/usr/share/texmf/web2c:/usr/texmf-local/web2c:/usr/texmf-dist/web2c:/usr/texmf/web2c://texmf-local/web2c:/://share/texmf-local/web2c://share/texmf-dist/web2c://share/texmf/web2c://texmf-local/web2c://texmf-dist/web2c://texmf/web2c. +This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022/Arch Linux) (preloaded format=pdflatex) + +kpathsea: Running mktexfmt pdflatex.fmt +mktexfmt: No such file or directory +I can't find the format file `pdflatex.fmt'! +Warning in file(con, "r") : + cannot open file 'Rd2.log': No such file or directory +Error in file(con, "r") : cannot open the connection +Error in running tools::texi2pdf() +You may want to clean up by 'rm -Rf /tmp/RtmpIZpCnq/Rd2pdf1084ce20004' diff --git a/agghoo.Rcheck/agghoo-manual.tex b/agghoo.Rcheck/agghoo-manual.tex new file mode 100644 index 0000000..8a561f0 --- /dev/null +++ b/agghoo.Rcheck/agghoo-manual.tex @@ -0,0 +1,44 @@ +\nonstopmode{} +\documentclass[letterpaper]{book} +\usepackage[times,hyper]{Rd} +\usepackage{makeidx} +\usepackage[utf8]{inputenc} % @SET ENCODING@ +% \usepackage{graphicx} % @USE GRAPHICX@ +\makeindex{} +\begin{document} +\chapter*{} +\begin{center} +{\textbf{\huge Package `agghoo'}} +\par\bigskip{\large \today} +\end{center} +\ifthenelse{\boolean{Rd@use@hyper}}{\hypersetup{pdftitle = {agghoo: Aggregated Hold-Out Cross Validation}}}{} +\begin{description} +\raggedright{} +\item[Title]\AsIs{Aggregated Hold-Out Cross Validation} +\item[Date]\AsIs{2022-08-30} +\item[Version]\AsIs{0.1-0} +\item[Description]\AsIs{The 'agghoo' procedure is an alternative to usual cross-validation. +Instead of choosing the best model trained on V subsamples, it determines +a winner model for each subsample, and then aggregate the V outputs. +For the details, see ``Aggregated hold-out'' by Guillaume Maillard, +Sylvain Arlot, Matthieu Lerasle (2021) <}\Rhref{https://arxiv.org/abs/1909.04890}{arXiv:1909.04890}\AsIs{> +published in Journal of Machine Learning Research 22(20):1--55.} +\item[Author]\AsIs{Sylvain Arlot }\email{sylvain.arlot@universite-paris-saclay.fr}\AsIs{ [cph,ctb], +Benjamin Auder }\email{benjamin.auder@universite-paris-saclay.fr}\AsIs{ [aut,cre,cph], +Melina Gallopin }\email{melina.gallopin@universite-paris-saclay.fr}\AsIs{ [cph,ctb], +Matthieu Lerasle }\email{matthieu.lerasle@universite-paris-saclay.fr}\AsIs{ [cph,ctb], +Guillaume Maillard }\email{guillaume.maillard@uni.lu}\AsIs{ [cph,ctb]} +\item[Maintainer]\AsIs{Benjamin Auder }\email{benjamin.auder@universite-paris-saclay.fr}\AsIs{} +\item[Depends]\AsIs{R (>= 3.5.0)} +\item[Imports]\AsIs{class, parallel, R6, rpart, FNN} +\item[Suggests]\AsIs{roxygen2} +\item[URL]\AsIs{}\url{https://git.auder.net/?p=agghoo.git}\AsIs{} +\item[License]\AsIs{MIT + file LICENSE} +\item[RoxygenNote]\AsIs{7.2.1} +\item[Collate]\AsIs{'compareTo.R' 'agghoo.R' 'R6_AgghooCV.R' 'R6_Model.R' +'checks.R' 'utils.R' 'A_NAMESPACE.R'} +\item[NeedsCompilation]\AsIs{no} +\end{description} +\Rdcontents{\R{} topics documented:} +\printindex{} +\end{document} diff --git a/agghoo.Rcheck/agghoo/DESCRIPTION b/agghoo.Rcheck/agghoo/DESCRIPTION new file mode 100644 index 0000000..cb86199 --- /dev/null +++ b/agghoo.Rcheck/agghoo/DESCRIPTION @@ -0,0 +1,27 @@ +Package: agghoo +Title: Aggregated Hold-Out Cross Validation +Date: 2022-08-30 +Version: 0.1-0 +Description: The 'agghoo' procedure is an alternative to usual cross-validation. + Instead of choosing the best model trained on V subsamples, it determines + a winner model for each subsample, and then aggregate the V outputs. + For the details, see "Aggregated hold-out" by Guillaume Maillard, + Sylvain Arlot, Matthieu Lerasle (2021) + published in Journal of Machine Learning Research 22(20):1--55. +Author: Sylvain Arlot [cph,ctb], + Benjamin Auder [aut,cre,cph], + Melina Gallopin [cph,ctb], + Matthieu Lerasle [cph,ctb], + Guillaume Maillard [cph,ctb] +Maintainer: Benjamin Auder +Depends: R (>= 3.5.0) +Imports: class, parallel, R6, rpart, FNN +Suggests: roxygen2 +URL: https://git.auder.net/?p=agghoo.git +License: MIT + file LICENSE +RoxygenNote: 7.2.1 +Collate: 'compareTo.R' 'agghoo.R' 'R6_AgghooCV.R' 'R6_Model.R' + 'checks.R' 'utils.R' 'A_NAMESPACE.R' +NeedsCompilation: no +Packaged: 2022-09-09 15:45:56 UTC; auder +Built: R 4.2.1; ; 2022-09-09 15:46:05 UTC; unix diff --git a/agghoo.Rcheck/agghoo/LICENSE b/agghoo.Rcheck/agghoo/LICENSE new file mode 100644 index 0000000..094ff81 --- /dev/null +++ b/agghoo.Rcheck/agghoo/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2021-2022 +COPYRIGHT HOLDER: Sylvain Arlot, Benjamin Auder, Melina Gallopin, Matthieu Lerasle, Guillaume Maillard diff --git a/agghoo.Rcheck/agghoo/Meta/Rd.rds b/agghoo.Rcheck/agghoo/Meta/Rd.rds new file mode 100644 index 0000000..f7bb5f4 Binary files /dev/null and b/agghoo.Rcheck/agghoo/Meta/Rd.rds differ diff --git a/agghoo.Rcheck/agghoo/Meta/features.rds b/agghoo.Rcheck/agghoo/Meta/features.rds new file mode 100644 index 0000000..3dc8fb5 Binary files /dev/null and b/agghoo.Rcheck/agghoo/Meta/features.rds differ diff --git a/agghoo.Rcheck/agghoo/Meta/hsearch.rds b/agghoo.Rcheck/agghoo/Meta/hsearch.rds new file mode 100644 index 0000000..051a2b7 Binary files /dev/null and b/agghoo.Rcheck/agghoo/Meta/hsearch.rds differ diff --git a/agghoo.Rcheck/agghoo/Meta/links.rds b/agghoo.Rcheck/agghoo/Meta/links.rds new file mode 100644 index 0000000..ba5b13a Binary files /dev/null and b/agghoo.Rcheck/agghoo/Meta/links.rds differ diff --git a/agghoo.Rcheck/agghoo/Meta/nsInfo.rds b/agghoo.Rcheck/agghoo/Meta/nsInfo.rds new file mode 100644 index 0000000..ca0be5e Binary files /dev/null and b/agghoo.Rcheck/agghoo/Meta/nsInfo.rds differ diff --git a/agghoo.Rcheck/agghoo/Meta/package.rds b/agghoo.Rcheck/agghoo/Meta/package.rds new file mode 100644 index 0000000..138494f Binary files /dev/null and b/agghoo.Rcheck/agghoo/Meta/package.rds differ diff --git a/agghoo.Rcheck/agghoo/NAMESPACE b/agghoo.Rcheck/agghoo/NAMESPACE new file mode 100644 index 0000000..7bbddef --- /dev/null +++ b/agghoo.Rcheck/agghoo/NAMESPACE @@ -0,0 +1,13 @@ +# Generated by roxygen2: do not edit by hand + +export(AgghooCV) +export(Model) +export(agghoo) +export(compareMulti) +export(compareRange) +export(compareTo) +importFrom(FNN,knn.reg) +importFrom(R6,R6Class) +importFrom(class,knn) +importFrom(rpart,rpart) +importFrom(stats,ppr) diff --git a/agghoo.Rcheck/agghoo/R/agghoo b/agghoo.Rcheck/agghoo/R/agghoo new file mode 100644 index 0000000..6686156 --- /dev/null +++ b/agghoo.Rcheck/agghoo/R/agghoo @@ -0,0 +1,27 @@ +# File share/R/nspackloader.R +# Part of the R package, https://www.R-project.org +# +# Copyright (C) 1995-2012 The R Core Team +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# A copy of the GNU General Public License is available at +# https://www.r-project.org/Licenses/ + +local({ + info <- loadingNamespaceInfo() + pkg <- info$pkgname + ns <- .getNamespace(as.name(pkg)) + if (is.null(ns)) + stop("cannot find namespace environment for ", pkg, domain = NA); + dbbase <- file.path(info$libname, pkg, "R", pkg) + lazyLoad(dbbase, ns, filter = function(n) n != ".__NAMESPACE__.") +}) diff --git a/agghoo.Rcheck/agghoo/R/agghoo.rdb b/agghoo.Rcheck/agghoo/R/agghoo.rdb new file mode 100644 index 0000000..4f8b251 Binary files /dev/null and b/agghoo.Rcheck/agghoo/R/agghoo.rdb differ diff --git a/agghoo.Rcheck/agghoo/R/agghoo.rdx b/agghoo.Rcheck/agghoo/R/agghoo.rdx new file mode 100644 index 0000000..2ce7cb7 Binary files /dev/null and b/agghoo.Rcheck/agghoo/R/agghoo.rdx differ diff --git a/agghoo.Rcheck/agghoo/help/AnIndex b/agghoo.Rcheck/agghoo/help/AnIndex new file mode 100644 index 0000000..e69de29 diff --git a/agghoo.Rcheck/agghoo/help/agghoo.rdb b/agghoo.Rcheck/agghoo/help/agghoo.rdb new file mode 100644 index 0000000..e69de29 diff --git a/agghoo.Rcheck/agghoo/help/agghoo.rdx b/agghoo.Rcheck/agghoo/help/agghoo.rdx new file mode 100644 index 0000000..c28f3f9 Binary files /dev/null and b/agghoo.Rcheck/agghoo/help/agghoo.rdx differ diff --git a/agghoo.Rcheck/agghoo/help/aliases.rds b/agghoo.Rcheck/agghoo/help/aliases.rds new file mode 100644 index 0000000..291dab0 Binary files /dev/null and b/agghoo.Rcheck/agghoo/help/aliases.rds differ diff --git a/agghoo.Rcheck/agghoo/help/paths.rds b/agghoo.Rcheck/agghoo/help/paths.rds new file mode 100644 index 0000000..3d2b25e Binary files /dev/null and b/agghoo.Rcheck/agghoo/help/paths.rds differ diff --git a/agghoo.Rcheck/agghoo/html/00Index.html b/agghoo.Rcheck/agghoo/html/00Index.html new file mode 100644 index 0000000..84eed59 --- /dev/null +++ b/agghoo.Rcheck/agghoo/html/00Index.html @@ -0,0 +1,24 @@ + + +R: Aggregated Hold-Out Cross Validation + + + +
+

Aggregated Hold-Out Cross Validation + +

+
+
+[Up] +[Top] +

Documentation for package ‘agghoo’ version 0.1-0

+ + + +

Help Pages

+ + +There are no help pages in this package +
diff --git a/agghoo.Rcheck/agghoo/html/R.css b/agghoo.Rcheck/agghoo/html/R.css new file mode 100644 index 0000000..2ef6cd6 --- /dev/null +++ b/agghoo.Rcheck/agghoo/html/R.css @@ -0,0 +1,120 @@ +@media screen { + .container { + padding-right: 10px; + padding-left: 10px; + margin-right: auto; + margin-left: auto; + max-width: 900px; + } +} + +.rimage img { /* from knitr - for examples and demos */ + width: 96%; + margin-left: 2%; +} + +.katex { font-size: 1.1em; } + +code { + color: inherit; + background: inherit; +} + +body { + line-height: 1.4; + background: white; + color: black; +} + +a:link { + background: white; + color: blue; +} + +a:visited { + background: white; + color: rgb(50%, 0%, 50%); +} + +h1 { + background: white; + color: rgb(55%, 55%, 55%); + font-family: monospace; + font-size: 1.4em; /* x-large; */ + text-align: center; +} + +h2 { + background: white; + color: rgb(40%, 40%, 40%); + font-family: monospace; + font-size: 1.2em; /* large; */ + text-align: center; +} + +h3 { + background: white; + color: rgb(40%, 40%, 40%); + font-family: monospace; + font-size: 1.2em; /* large; */ +} + +h4 { + background: white; + color: rgb(40%, 40%, 40%); + font-family: monospace; + font-style: italic; + font-size: 1.2em; /* large; */ +} + +h5 { + background: white; + color: rgb(40%, 40%, 40%); + font-family: monospace; +} + +h6 { + background: white; + color: rgb(40%, 40%, 40%); + font-family: monospace; + font-style: italic; +} + +img.toplogo { + width: 4em; + vertical-align: middle; +} + +img.arrow { + width: 30px; + height: 30px; + border: 0; +} + +span.acronym { + font-size: small; +} + +span.env { + font-family: monospace; +} + +span.file { + font-family: monospace; +} + +span.option{ + font-family: monospace; +} + +span.pkg { + font-weight: bold; +} + +span.samp{ + font-family: monospace; +} + +div.vignettes a:hover { + background: rgb(85%, 85%, 85%); +} diff --git a/agghoo_0.1-0.tar.gz b/agghoo_0.1-0.tar.gz new file mode 100644 index 0000000..719e7e9 Binary files /dev/null and b/agghoo_0.1-0.tar.gz differ diff --git a/man/AgghooCV.Rd b/man/AgghooCV.Rd deleted file mode 100644 index 97d4c41..0000000 --- a/man/AgghooCV.Rd +++ /dev/null @@ -1,116 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/R6_AgghooCV.R -\name{AgghooCV} -\alias{AgghooCV} -\title{R6 class with agghoo functions fit() and predict().} -\description{ -Class encapsulating the methods to run to obtain the best predictor -from the list of models (see 'Model' class). -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-new}{\code{AgghooCV$new()}} -\item \href{#method-fit}{\code{AgghooCV$fit()}} -\item \href{#method-predict}{\code{AgghooCV$predict()}} -\item \href{#method-getParams}{\code{AgghooCV$getParams()}} -\item \href{#method-clone}{\code{AgghooCV$clone()}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-new}{}}} -\subsection{Method \code{new()}}{ -Create a new AgghooCV object. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{AgghooCV$new(data, target, task, gmodel, loss)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{data}}{Matrix or data.frame} - -\item{\code{target}}{Vector of targets (generally numeric or factor)} - -\item{\code{task}}{"regression" or "classification". -Default: classification if target not numeric.} - -\item{\code{gmodel}}{Generic model returning a predictive function -Default: tree if mixed data, knn/ppr otherwise.} - -\item{\code{loss}}{Function assessing the error of a prediction -Default: error rate or mean(abs(error)).} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-fit}{}}} -\subsection{Method \code{fit()}}{ -Fit an agghoo model. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{AgghooCV$fit(CV = NULL)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{CV}}{List describing cross-validation to run. Slots: \cr - - type: 'vfold' or 'MC' for Monte-Carlo (default: MC) \cr - - V: number of runs (default: 10) \cr - - test_size: percentage of data in the test dataset, for MC - (irrelevant for V-fold). Default: 0.2. \cr - - shuffle: wether or not to shuffle data before V-fold. - Irrelevant for Monte-Carlo; default: TRUE \cr -Default (if NULL): type="MC", V=10, test_size=0.2} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-predict}{}}} -\subsection{Method \code{predict()}}{ -Predict an agghoo model (after calling fit()) -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{AgghooCV$predict(X)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{X}}{Matrix or data.frame to predict} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-getParams}{}}} -\subsection{Method \code{getParams()}}{ -Return the list of V best parameters (after calling fit()) -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{AgghooCV$getParams()}\if{html}{\out{
}} -} - -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{AgghooCV$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/CVvoting_core.Rd b/man/CVvoting_core.Rd deleted file mode 100644 index 2de00e0..0000000 --- a/man/CVvoting_core.Rd +++ /dev/null @@ -1,12 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compareTo.R -\name{CVvoting_core} -\alias{CVvoting_core} -\title{CVvoting_core} -\usage{ -CVvoting_core(data, target, task, gmodel, params, loss, CV) -} -\description{ -"voting" cross-validation method, added here as an example. -Parameters are described in ?agghoo and ?AgghooCV -} diff --git a/man/CVvoting_run.Rd b/man/CVvoting_run.Rd deleted file mode 100644 index 9aad2fe..0000000 --- a/man/CVvoting_run.Rd +++ /dev/null @@ -1,13 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compareTo.R -\name{CVvoting_run} -\alias{CVvoting_run} -\title{CVvoting_run} -\usage{ -CVvoting_run(dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ...) -} -\description{ -Run and eval the voting cross-validation procedure. -Parameters are rather explicit except "floss", which corresponds to the -"final" loss function, applied to compute the error on testing dataset. -} diff --git a/man/Model.Rd b/man/Model.Rd deleted file mode 100644 index 0e52101..0000000 --- a/man/Model.Rd +++ /dev/null @@ -1,109 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/R6_Model.R -\name{Model} -\alias{Model} -\title{R6 class representing a (generic) model.} -\description{ -"Model" class, containing a (generic) learning function, which from -data + target [+ params] returns a prediction function X --> y. -Parameters for cross-validation are either provided or estimated. -Model family can be chosen among "tree", "ppr" and "knn" for now. -} -\section{Public fields}{ -\if{html}{\out{
}} -\describe{ -\item{\code{nmodels}}{Number of parameters (= number of [predictive] models)} -} -\if{html}{\out{
}} -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-new}{\code{Model$new()}} -\item \href{#method-get}{\code{Model$get()}} -\item \href{#method-getParam}{\code{Model$getParam()}} -\item \href{#method-clone}{\code{Model$clone()}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-new}{}}} -\subsection{Method \code{new()}}{ -Create a new generic model. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Model$new(data, target, task, gmodel = NULL, params = NULL)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{data}}{Matrix or data.frame} - -\item{\code{target}}{Vector of targets (generally numeric or factor)} - -\item{\code{task}}{"regression" or "classification"} - -\item{\code{gmodel}}{Generic model returning a predictive function; chosen -automatically given data and target nature if not provided.} - -\item{\code{params}}{List of parameters for cross-validation (each defining a model)} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-get}{}}} -\subsection{Method \code{get()}}{ -Returns the model at index "index", trained on dataHO/targetHO. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Model$get(dataHO, targetHO, index)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{dataHO}}{Matrix or data.frame} - -\item{\code{targetHO}}{Vector of targets (generally numeric or factor)} - -\item{\code{index}}{Index of the model in 1...nmodels} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-getParam}{}}} -\subsection{Method \code{getParam()}}{ -Returns the parameter at index "index". -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Model$getParam(index)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{index}}{Index of the model in 1...nmodels} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Model$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/agghoo.Rd b/man/agghoo.Rd deleted file mode 100644 index 38730eb..0000000 --- a/man/agghoo.Rd +++ /dev/null @@ -1,57 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/agghoo.R -\name{agghoo} -\alias{agghoo} -\title{agghoo} -\usage{ -agghoo(data, target, task = NULL, gmodel = NULL, params = NULL, loss = NULL) -} -\arguments{ -\item{data}{Data frame or matrix containing the data in lines.} - -\item{target}{The target values to predict. Generally a vector, -but possibly a matrix in the case of "soft classification".} - -\item{task}{"classification" or "regression". Default: -regression if target is numerical, classification otherwise.} - -\item{gmodel}{A "generic model", which is a function returning a predict -function (taking X as only argument) from the tuple -(dataHO, targetHO, param), where 'HO' stands for 'Hold-Out', -referring to cross-validation. Cross-validation is run on an array -of 'param's. See params argument. Default: see R6::Model.} - -\item{params}{A list of parameters. Often, one list cell is just a -numerical value, but in general it could be of any type. -Default: see R6::Model.} - -\item{loss}{A function assessing the error of a prediction. -Arguments are y1 and y2 (comparing a prediction to known values). -loss(y1, y2) --> real number (error). Default: see R6::AgghooCV.} -} -\value{ -An R6::AgghooCV object o. Then, call o$fit() and finally o$predict(newData) -} -\description{ -Run the (core) agghoo procedure. -Arguments specify the list of models, their parameters and the -cross-validation settings, among others. -} -\examples{ -# Regression: -a_reg <- agghoo(iris[,-c(2,5)], iris[,2]) -a_reg$fit() -pr <- a_reg$predict(iris[,-c(2,5)] + rnorm(450, sd=0.1)) -# Classification -a_cla <- agghoo(iris[,-5], iris[,5]) -a_cla$fit() -pc <- a_cla$predict(iris[,-5] + rnorm(600, sd=0.1)) - -} -\references{ -Guillaume Maillard, Sylvain Arlot, Matthieu Lerasle. "Aggregated hold-out". -Journal of Machine Learning Research 22(20):1--55, 2021. -} -\seealso{ -Function \code{\link{compareTo}} -} diff --git a/man/agghoo_run.Rd b/man/agghoo_run.Rd deleted file mode 100644 index a4f565d..0000000 --- a/man/agghoo_run.Rd +++ /dev/null @@ -1,13 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compareTo.R -\name{agghoo_run} -\alias{agghoo_run} -\title{agghoo_run} -\usage{ -agghoo_run(dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ...) -} -\description{ -Run and eval the agghoo procedure. -Parameters are rather explicit except "floss", which corresponds to the -"final" loss function, applied to compute the error on testing dataset. -} diff --git a/man/compareMulti.Rd b/man/compareMulti.Rd deleted file mode 100644 index 8bf537e..0000000 --- a/man/compareMulti.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compareTo.R -\name{compareMulti} -\alias{compareMulti} -\title{compareMulti} -\usage{ -compareMulti( - data, - target, - method_s, - N = 100, - nc = NA, - floss = NULL, - verbose = TRUE, - ... -) -} -\arguments{ -\item{data}{Data matrix or data.frame} - -\item{target}{Target vector (generally)} - -\item{method_s}{Either a single function, or a list -(examples: agghoo_run, standardCV_run)} - -\item{N}{Number of calls to method(s)} - -\item{nc}{Number of cores. Set to parallel::detectCores() if undefined. -Set it to any value <=1 to say "no parallelism".} - -\item{floss}{Loss function to compute the error on testing dataset.} - -\item{verbose}{TRUE to print task numbers and "Errors:" in the end.} - -\item{...}{arguments passed to method_s function(s)} -} -\description{ -Run compareTo N times in parallel. -} diff --git a/man/compareRange.Rd b/man/compareRange.Rd deleted file mode 100644 index c884e43..0000000 --- a/man/compareRange.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compareTo.R -\name{compareRange} -\alias{compareRange} -\title{compareRange} -\usage{ -compareRange( - data, - target, - method_s, - N = 100, - nc = NA, - floss = NULL, - V_range = c(10, 15, 20), - ... -) -} -\arguments{ -\item{data}{Data matrix or data.frame} - -\item{target}{Target vector (generally)} - -\item{method_s}{Either a single function, or a list -(examples: agghoo_run, standardCV_run)} - -\item{N}{Number of calls to method(s)} - -\item{nc}{Number of cores. Set to parallel::detectCores() if undefined. -Set it to any value <=1 to say "no parallelism".} - -\item{floss}{Loss function to compute the error on testing dataset.} - -\item{V_range}{Values of V to be tested.} - -\item{...}{arguments passed to method_s function(s)} -} -\description{ -Run compareMulti on several values of the parameter V. -} diff --git a/man/compareTo.Rd b/man/compareTo.Rd deleted file mode 100644 index d5c1ab4..0000000 --- a/man/compareTo.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compareTo.R -\name{compareTo} -\alias{compareTo} -\title{compareTo} -\usage{ -compareTo( - data, - target, - method_s, - rseed = -1, - floss = NULL, - verbose = TRUE, - ... -) -} -\arguments{ -\item{data}{Data matrix or data.frame} - -\item{target}{Target vector (generally)} - -\item{method_s}{Either a single function, or a list -(examples: agghoo_run, standardCV_run)} - -\item{rseed}{Seed of the random generator (-1 means "random seed")} - -\item{floss}{Loss function to compute the error on testing dataset.} - -\item{verbose}{TRUE to request methods to be verbose.} - -\item{...}{arguments passed to method_s function(s)} -} -\description{ -Compare a list of learning methods (or run only one), on data/target. -} diff --git a/man/standardCV_core.Rd b/man/standardCV_core.Rd deleted file mode 100644 index 42ad88c..0000000 --- a/man/standardCV_core.Rd +++ /dev/null @@ -1,12 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compareTo.R -\name{standardCV_core} -\alias{standardCV_core} -\title{standardCV_core} -\usage{ -standardCV_core(data, target, task, gmodel, params, loss, CV) -} -\description{ -Cross-validation method, added here as an example. -Parameters are described in ?agghoo and ?AgghooCV -} diff --git a/man/standardCV_run.Rd b/man/standardCV_run.Rd deleted file mode 100644 index 0937764..0000000 --- a/man/standardCV_run.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compareTo.R -\name{standardCV_run} -\alias{standardCV_run} -\title{standardCV_run} -\usage{ -standardCV_run( - dataTrain, - dataTest, - targetTrain, - targetTest, - floss, - verbose, - ... -) -} -\description{ -Run and eval the standard cross-validation procedure. -Parameters are rather explicit except "floss", which corresponds to the -"final" loss function, applied to compute the error on testing dataset. -}