From: Benjamin Auder Date: Sat, 5 Jun 2021 17:38:39 +0000 (+0200) Subject: First commit X-Git-Url: https://git.auder.net/js/img/app_dev.php/current/git-logo.png?a=commitdiff_plain;h=c5946158cae8f7b9400f107c533e745f835eb20f;p=agghoo.git First commit --- c5946158cae8f7b9400f107c533e745f835eb20f diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..00e2df3 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,33 @@ +Package: agghoo +Title: Aggregated Hold-out Cross Validation +Date: 2021-06-05 +Version: 0.1-0 +Description: The 'agghoo' procedure is an alternative to usual cross-validation. + Instead of choosing the best model trained on V subsamples, it determines + a winner model for each subsample, and then aggregate the V outputs. + For the details, see "Aggregated hold-out" by Guillaume Maillard, + Sylvain Arlot, Matthieu Lerasle (2021) + published in Journal of Machine Learning Research 22(20):1--55. +Author: Sylvain Arlot [cph,ctb], + Benjamin Auder [aut,cre,cph], + Melina Gallopin [cph,ctb], + Matthieu Lerasle [cph,ctb], + Guillaume Maillard [cph,ctb] +Maintainer: Benjamin Auder +Depends: + R (>= 3.5.0) +Imports: + R6, + caret, + rpart, + randomForest +Suggests: + roxygen2 +URL: https://git.auder.net/?p=agghoo.git +License: MIT + file LICENSE +RoxygenNote: 7.1.1 +Collate: + 'agghoo.R' + 'R6_Agghoo.R' + 'R6_Model.R' + 'A_NAMESPACE.R' diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6e92110 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2021 +COPYRIGHT HOLDER: Sylvain Arlot, Benjamin Auder, Melina Gallopin, Matthieu Lerasle, Guillaume Maillard diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..7604bfd --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,6 @@ +# Generated by roxygen2: do not edit by hand + +export(Agghoo) +export(Model) +export(agghoo) +export(compareToStandard) diff --git a/R/A_NAMESPACE.R b/R/A_NAMESPACE.R new file mode 100644 index 0000000..0902c59 --- /dev/null +++ b/R/A_NAMESPACE.R @@ -0,0 +1,4 @@ +#' @include R6_Model.R +#' @include R6_Agghoo.R +#' @include agghoo.R +NULL diff --git a/R/R6_Agghoo.R b/R/R6_Agghoo.R new file mode 100644 index 0000000..3694715 --- /dev/null +++ b/R/R6_Agghoo.R @@ -0,0 +1,195 @@ +#' @title R6 class with agghoo functions fit() and predict(). +#' +#' @description +#' Class encapsulating the methods to run to obtain the best predictor +#' from the list of models (see 'Model' class). +#' +#' @export +Agghoo <- R6::R6Class("Agghoo", + public = list( + #' @description Create a new Agghoo object. + #' @param data Matrix or data.frame + #' @param target Vector of targets (generally numeric or factor) + #' @param task "regression" or "classification" + #' @param gmodel Generic model returning a predictive function + #' @param quality Function assessing the quality of a prediction; + #' quality(y1, y2) --> real number + initialize = function(data, target, task, gmodel, quality = NA) { + private$data <- data + private$target <- target + private$task <- task + private$gmodel <- gmodel + if (is.na(quality)) { + quality <- function(y1, y2) { + # NOTE: if classif output is a probability matrix, adapt. + if (task == "classification") + mean(y1 == y2) + else + atan(1.0 / (mean(abs(y1 - y2) + 0.01))) #experimental... + } + } + private$quality <- quality + }, + #' @description Fit an agghoo model. + #' @param CV List describing cross-validation to run. Slots: + #' - type: 'vfold' or 'MC' for Monte-Carlo (default: MC) + #' - V: number of runs (default: 10) + #' - test_size: percentage of data in the test dataset, for MC + #' (irrelevant for V-fold). Default: 0.2. + #' - shuffle: wether or not to shuffle data before V-fold. + #' Irrelevant for Monte-Carlo; default: TRUE + #' @param mode "agghoo" or "standard" (for usual cross-validation) + fit = function( + CV = list(type = "MC", + V = 10, + test_size = 0.2, + shuffle = TRUE), + mode="agghoo" + ) { + if (!is.list(CV)) + stop("CV: list of type, V, [test_size], [shuffle]") + n <- nrow(private$data) + shuffle_inds <- NA + if (CV$type == "vfold" && CV$shuffle) + shuffle_inds <- sample(n, n) + if (mode == "agghoo") { + vperfs <- list() + for (v in 1:CV$V) { + test_indices <- private$get_testIndices(CV, v, n, shuffle_inds) + vperf <- private$get_modelPerf(test_indices) + vperfs[[v]] <- vperf + } + private$run_res <- vperfs + } + else { + # Standard cross-validation + best_index = 0 + best_perf <- -1 + for (p in 1:private$gmodel$nmodels) { + tot_perf <- 0 + for (v in 1:CV$V) { + test_indices <- private$get_testIndices(CV, v, n, shuffle_inds) + perf <- private$get_modelPerf(test_indices, p) + tot_perf <- tot_perf + perf / CV$V + } + if (tot_perf > best_perf) { + # TODO: if ex-aequos: models list + choose at random + best_index <- p + best_perf <- tot_perf + } + } + best_model <- private$gmodel$get(private$data, private$target, best_index) + private$run_res <- list( list(model=best_model, perf=best_perf) ) + } + }, + #' @description Predict an agghoo model (after calling fit()) + #' @param X Matrix or data.frame to predict + #' @param weight "uniform" (default) or "quality" to weight votes or + #' average models performances (TODO: bad idea?!) + predict = function(X, weight="uniform") { + if (!is.list(private$run_res) || is.na(private$run_res)) { + print("Please call $fit() method first") + return + } + V <- length(private$run_res) + if (V == 1) + # Standard CV: + return (private$run_res[[1]]$model(X)) + # Agghoo: + if (weight == "uniform") + weights <- rep(1 / V, V) + else { + perfs <- sapply(private$run_res, function(item) item$perf) + perfs[perfs < 0] <- 0 #TODO: show a warning (with count of < 0...) + total_weight <- sum(perfs) #TODO: error if total_weight == 0 + weights <- perfs / total_weight + } + n <- nrow(X) + # TODO: detect if output = probs matrix for classif (in this case, adapt?) + # prediction agghoo "probabiliste" pour un nouveau x : + # argMax({ predict(m_v, x), v in 1..V }) ... + if (private$task == "classification") { + votes <- as.list(rep(NA, n)) + parse_numeric <- FALSE + } + else + preds <- matrix(0, nrow=n, ncol=V) + for (v in 1:V) { + predictions <- private$run_res[[v]]$model(X) + if (private$task == "regression") + preds <- cbind(preds, weights[v] * predictions) + else { + if (!parse_numeric && is.numeric(predictions)) + parse_numeric <- TRUE + for (i in 1:n) { + if (!is.list(votes[[i]])) + votes[[i]] <- list() + index <- as.character(predictions[i]) + if (is.null(votes[[i]][[index]])) + votes[[i]][[index]] <- 0 + votes[[i]][[index]] <- votes[[i]][[index]] + weights[v] + } + } + } + if (private$task == "regression") + return (rowSums(preds)) + res <- c() + for (i in 1:n) { + # TODO: if ex-aequos, random choice... + ind_max <- which.max(unlist(votes[[i]])) + pred_class <- names(votes[[i]])[ind_max] + if (parse_numeric) + pred_class <- as.numeric(pred_class) + res <- c(res, pred_class) + } + res + } + ), + private = list( + data = NA, + target = NA, + task = NA, + gmodel = NA, + quality = NA, + run_res = NA, + get_testIndices = function(CV, v, n, shuffle_inds) { + if (CV$type == "vfold") { + first_index = round((v-1) * n / CV$V) + 1 + last_index = round(v * n / CV$V) + test_indices = first_index:last_index + if (CV$shuffle) + test_indices <- shuffle_inds[test_indices] + } + else + test_indices = sample(n, round(n * CV$test_size)) + test_indices + }, + get_modelPerf = function(test_indices, p=0) { + getOnePerf <- function(p) { + model_pred <- private$gmodel$get(dataHO, targetHO, p) + prediction <- model_pred(testX) + perf <- private$quality(prediction, testY) + list(model=model_pred, perf=perf) + } + dataHO <- private$data[-test_indices,] + testX <- private$data[test_indices,] + targetHO <- private$target[-test_indices] + testY <- private$target[test_indices] + if (p >= 1) + # Standard CV: one model at a time + return (getOnePerf(p)$perf) + # Agghoo: loop on all models + best_model = NULL + best_perf <- -1 + for (p in 1:private$gmodel$nmodels) { + model_perf <- getOnePerf(p) + if (model_perf$perf > best_perf) { + # TODO: if ex-aequos: models list + choose at random + best_model <- model_perf$model + best_perf <- model_perf$perf + } + } + list(model=best_model, perf=best_perf) + } + ) +) diff --git a/R/R6_Model.R b/R/R6_Model.R new file mode 100644 index 0000000..9d7fc70 --- /dev/null +++ b/R/R6_Model.R @@ -0,0 +1,143 @@ +#' @title R6 class representing a (generic) model. +#' +#' @description +#' "Model" class, containing a (generic) learning function, which from +#' data + target [+ params] returns a prediction function X --> y. +#' Parameters for cross-validation are either provided or estimated. +#' Model family can be chosen among "rf", "tree", "ppr" and "knn" for now. +#' +#' @export +Model <- R6::R6Class("Model", + public = list( + #' @field nmodels Number of parameters (= number of [predictive] models) + nmodels = NA, + #' @description Create a new generic model. + #' @param data Matrix or data.frame + #' @param target Vector of targets (generally numeric or factor) + #' @param task "regression" or "classification" + #' @param gmodel Generic model returning a predictive function; chosen + #' automatically given data and target nature if not provided. + #' @param params List of parameters for cross-validation (each defining a model) + initialize = function(data, target, task, gmodel = NA, params = NA) { + if (is.na(gmodel)) { + # (Generic) model not provided + all_numeric <- is.numeric(as.matrix(data)) + if (!all_numeric) + # At least one non-numeric column: use random forests or trees + # TODO: 4 = arbitrary magic number... + gmodel = ifelse(ncol(data) >= 4, "rf", "tree") + else + # Numerical data + gmodel = ifelse(task == "regression", "ppr", "knn") + } + if (is.na(params)) + # Here, gmodel is a string (= its family), + # because a custom model must be given with its parameters. + params <- as.list(private$getParams(gmodel, data, target)) + private$params <- params + if (is.character(gmodel)) + gmodel <- private$getGmodel(gmodel, task) + private$gmodel <- gmodel + self$nmodels <- length(private$params) + }, + #' @description + #' Returns the model at index "index", trained on dataHO/targetHO. + #' index is between 1 and self$nmodels. + #' @param dataHO Matrix or data.frame + #' @param targetHO Vector of targets (generally numeric or factor) + #' @param index Index of the model in 1...nmodels + get = function(dataHO, targetHO, index) { + private$gmodel(dataHO, targetHO, private$params[[index]]) + } + ), + private = list( + # No need to expose model or parameters list + gmodel = NA, + params = NA, + # Main function: given a family, return a generic model, which in turn + # will output a predictive model from data + target + params. + getGmodel = function(family, task) { + if (family == "tree") { + function(dataHO, targetHO, param) { + require(rpart) + method <- ifelse(task == "classification", "class", "anova") + df <- data.frame(cbind(dataHO, target=targetHO)) + model <- rpart(target ~ ., df, method=method, control=list(cp=param)) + function(X) predict(model, X) + } + } + else if (family == "rf") { + function(dataHO, targetHO, param) { + require(randomForest) + if (task == "classification" && !is.factor(targetHO)) + targetHO <- as.factor(targetHO) + model <- randomForest::randomForest(dataHO, targetHO, mtry=param) + function(X) predict(model, X) + } + } + else if (family == "ppr") { + function(dataHO, targetHO, param) { + model <- stats::ppr(dataHO, targetHO, nterms=param) + function(X) predict(model, X) + } + } + else if (family == "knn") { + function(dataHO, targetHO, param) { + require(class) + function(X) class::knn(dataHO, X, cl=targetHO, k=param) + } + } + }, + # Return a default list of parameters, given a gmodel family + getParams = function(family, data, target) { + if (family == "tree") { + # Run rpart once to obtain a CV grid for parameter cp + require(rpart) + df <- data.frame(cbind(data, target=target)) + ctrl <- list( + minsplit = 2, + minbucket = 1, + maxcompete = 0, + maxsurrogate = 0, + usesurrogate = 0, + xval = 0, + surrogatestyle = 0, + maxdepth = 30) + r <- rpart(target ~ ., df, method="class", control=ctrl) + cps <- r$cptable[-1,1] + if (length(cps) <= 11) + return (cps) + step <- (length(cps) - 1) / 10 + cps[unique(round(seq(1, length(cps), step)))] + } + else if (family == "rf") { + p <- ncol(data) + # Use caret package to obtain the CV grid of mtry values + require(caret) + caret::var_seq(p, classification = (task == "classificaton"), + len = min(10, p-1)) + } + else if (family == "ppr") + # This is nterms in ppr() function + 1:10 + else if (family == "knn") { + n <- nrow(data) + # Choose ~10 NN values + K <- length(unique(target)) + if (n <= 10) + return (1:(n-1)) + sqrt_n <- sqrt(n) + step <- (2*sqrt_n - 1) / 10 + grid <- unique(round(seq(1, 2*sqrt_n, step))) + if (K == 2) { + # Common binary classification case: odd number of neighbors + for (i in 2:11) { + if (grid[i] %% 2 == 0) + grid[i] <- grid[i] + 1 #arbitrary choice + } + } + grid + } + } + ) +) diff --git a/R/agghoo.R b/R/agghoo.R new file mode 100644 index 0000000..4a25b17 --- /dev/null +++ b/R/agghoo.R @@ -0,0 +1,100 @@ +#' agghoo +#' +#' Run the agghoo procedure. (...) +#' +#' @param data Data frame or matrix containing the data in lines. +#' @param target The target values to predict. Generally a vector. +#' @param task "classification" or "regression". Default: +#' regression if target is numerical, classification otherwise. +#' @param gmodel A "generic model", which is a function returning a predict +#' function (taking X as only argument) from the tuple +#' (dataHO, targetHO, param), where 'HO' stands for 'Hold-Out', +#' referring to cross-validation. Cross-validation is run on an array +#' of 'param's. See params argument. Default: see R6::Model. +#' @param params A list of parameters. Often, one list cell is just a +#' numerical value, but in general it could be of any type. +#' Default: see R6::Model. +#' @param quality A function assessing the quality of a prediction. +#' Arguments are y1 and y2 (comparing a prediction to known values). +#' Default: see R6::Agghoo. +#' +#' @return An R6::Agghoo object. +#' +#' @examples +#' # Regression: +#' a_reg <- agghoo(iris[,-c(2,5)], iris[,2]) +#' a_reg$fit() +#' pr <- a_reg$predict(iris[,-c(2,5)] + rnorm(450, sd=0.1)) +#' # Classification +#' a_cla <- agghoo(iris[,-5], iris[,5]) +#' a_cla$fit(mode="standard") +#' pc <- a_cla$predict(iris[,-5] + rnorm(600, sd=0.1)) +#' +#' @export +agghoo <- function(data, target, task = NA, gmodel = NA, params = NA, quality = NA) { + # Args check: + if (!is.data.frame(data) && !is.matrix(data)) + stop("data: data.frame or matrix") + if (nrow(data) <= 1 || any(dim(data) == 0)) + stop("data: non-empty, >= 2 rows") + if (!is.numeric(target) && !is.factor(target) && !is.character(target)) + stop("target: numeric, factor or character vector") + if (!is.na(task)) + task = match.arg(task, c("classification", "regression")) + if (is.character(gmodel)) + gmodel <- match.arg("knn", "ppr", "rf") + else if (!is.na(gmodel) && !is.function(gmodel)) + # No further checks here: fingers crossed :) + stop("gmodel: function(dataHO, targetHO, param) --> function(X) --> y") + if (is.numeric(params) || is.character(params)) + params <- as.list(params) + if (!is.na(params) && !is.list(params)) + stop("params: numerical, character, or list (passed to model)") + if (!is.na(gmodel) && !is.character(model) && is.na(params)) + stop("params must be provided when using a custom model") + if (is.na(gmodel) && !is.na(params)) + stop("model must be provided when using custom params") + if (!is.na(quality) && !is.function(quality)) + # No more checks here as well... TODO:? + stop("quality: function(y1, y2) --> Real") + + if (is.na(task)) { + if (is.numeric(target)) + task = "regression" + else + task = "classification" + } + # Build Model object (= list of parameterized models) + model <- Model$new(data, target, task, gmodel, params) + # Return Agghoo object, to run and predict + Agghoo$new(data, target, task, model, quality) +} + +#' compareToStandard +#' +#' Temporary function to compare agghoo to CV +#' (TODO: extended, in another file, more tests - when faster code). +#' +#' @export +compareToStandard <- function(df, t_idx, task = NA, rseed = -1) { + if (rseed >= 0) + set.seed(rseed) + if (is.na(task)) + task <- ifelse(is.numeric(df[,t_idx]), "regression", "classification") + n <- nrow(df) + test_indices <- sample( n, round(n / ifelse(n >= 500, 10, 5)) ) + a <- agghoo(df[-test_indices,-t_idx], df[-test_indices,t_idx], task) + a$fit(mode="agghoo") #default mode + pa <- a$predict(df[test_indices,-t_idx]) + print(paste("error agghoo", + ifelse(task == "classification", + mean(p != df[test_indices,t_idx]), + mean(abs(pa - df[test_indices,t_idx]))))) + # Compare with standard cross-validation: + a$fit(mode="standard") + ps <- a$predict(df[test_indices,-t_idx]) + print(paste("error CV", + ifelse(task == "classification", + mean(ps != df[test_indices,t_idx]), + mean(abs(ps - df[test_indices,t_idx]))))) +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..e932011 --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +# agghoo + +R package for model selection based on aggregation. +Alternative to standard corss-validation. + +## Install the package + +R CMD INSTALL . + +## Use the package + +> library(agghoo) +> ?agghoo diff --git a/man/Agghoo.Rd b/man/Agghoo.Rd new file mode 100644 index 0000000..dc70db6 --- /dev/null +++ b/man/Agghoo.Rd @@ -0,0 +1,110 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/R6_Agghoo.R +\name{Agghoo} +\alias{Agghoo} +\title{R6 class with agghoo functions fit() and predict().} +\description{ +Class encapsulating the methods to run to obtain the best predictor +from the list of models (see 'Model' class). +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-new}{\code{Agghoo$new()}} +\item \href{#method-fit}{\code{Agghoo$fit()}} +\item \href{#method-predict}{\code{Agghoo$predict()}} +\item \href{#method-clone}{\code{Agghoo$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-new}{}}} +\subsection{Method \code{new()}}{ +Create a new Agghoo object. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Agghoo$new(data, target, task, gmodel, quality = NA)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{data}}{Matrix or data.frame} + +\item{\code{target}}{Vector of targets (generally numeric or factor)} + +\item{\code{task}}{"regression" or "classification"} + +\item{\code{gmodel}}{Generic model returning a predictive function} + +\item{\code{quality}}{Function assessing the quality of a prediction; +quality(y1, y2) --> real number} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-fit}{}}} +\subsection{Method \code{fit()}}{ +Fit an agghoo model. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Agghoo$fit( + CV = list(type = "MC", V = 10, test_size = 0.2, shuffle = TRUE), + mode = "agghoo" +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{CV}}{List describing cross-validation to run. Slots: +- type: 'vfold' or 'MC' for Monte-Carlo (default: MC) +- V: number of runs (default: 10) +- test_size: percentage of data in the test dataset, for MC + (irrelevant for V-fold). Default: 0.2. +- shuffle: wether or not to shuffle data before V-fold. + Irrelevant for Monte-Carlo; default: TRUE} + +\item{\code{mode}}{"agghoo" or "standard" (for usual cross-validation)} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-predict}{}}} +\subsection{Method \code{predict()}}{ +Predict an agghoo model (after calling fit()) +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Agghoo$predict(X, weight = "uniform")}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{X}}{Matrix or data.frame to predict} + +\item{\code{weight}}{"uniform" (default) or "quality" to weight votes or +average models performances (TODO: bad idea?!)} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Agghoo$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/Model.Rd b/man/Model.Rd new file mode 100644 index 0000000..a16f8ae --- /dev/null +++ b/man/Model.Rd @@ -0,0 +1,92 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/R6_Model.R +\name{Model} +\alias{Model} +\title{R6 class representing a (generic) model.} +\description{ +"Model" class, containing a (generic) learning function, which from +data + target [+ params] returns a prediction function X --> y. +Parameters for cross-validation are either provided or estimated. +Model family can be chosen among "rf", "tree", "ppr" and "knn" for now. +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{nmodels}}{Number of parameters (= number of [predictive] models)} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-new}{\code{Model$new()}} +\item \href{#method-get}{\code{Model$get()}} +\item \href{#method-clone}{\code{Model$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-new}{}}} +\subsection{Method \code{new()}}{ +Create a new generic model. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Model$new(data, target, task, gmodel = NA, params = NA)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{data}}{Matrix or data.frame} + +\item{\code{target}}{Vector of targets (generally numeric or factor)} + +\item{\code{task}}{"regression" or "classification"} + +\item{\code{gmodel}}{Generic model returning a predictive function; chosen +automatically given data and target nature if not provided.} + +\item{\code{params}}{List of parameters for cross-validation (each defining a model)} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-get}{}}} +\subsection{Method \code{get()}}{ +Returns the model at index "index", trained on dataHO/targetHO. +index is between 1 and self$nmodels. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Model$get(dataHO, targetHO, index)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{dataHO}}{Matrix or data.frame} + +\item{\code{targetHO}}{Vector of targets (generally numeric or factor)} + +\item{\code{index}}{Index of the model in 1...nmodels} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Model$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/agghoo.Rd b/man/agghoo.Rd new file mode 100644 index 0000000..dea76a1 --- /dev/null +++ b/man/agghoo.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/agghoo.R +\name{agghoo} +\alias{agghoo} +\title{agghoo} +\usage{ +agghoo(data, target, task = NA, gmodel = NA, params = NA, quality = NA) +} +\arguments{ +\item{data}{Data frame or matrix containing the data in lines.} + +\item{target}{The target values to predict. Generally a vector.} + +\item{task}{"classification" or "regression". Default: +regression if target is numerical, classification otherwise.} + +\item{gmodel}{A "generic model", which is a function returning a predict +function (taking X as only argument) from the tuple +(dataHO, targetHO, param), where 'HO' stands for 'Hold-Out', +referring to cross-validation. Cross-validation is run on an array +of 'param's. See params argument. Default: see R6::Model.} + +\item{params}{A list of parameters. Often, one list cell is just a +numerical value, but in general it could be of any type. +Default: see R6::Model.} + +\item{quality}{A function assessing the quality of a prediction. +Arguments are y1 and y2 (comparing a prediction to known values). +Default: see R6::Agghoo.} +} +\value{ +An R6::Agghoo object. +} +\description{ +Run the agghoo procedure. (...) +} +\examples{ +# Regression: +a_reg <- agghoo(iris[,-c(2,5)], iris[,2]) +a_reg$fit() +pr <- a_reg$predict(iris[,-c(2,5)] + rnorm(450, sd=0.1)) +# Classification +a_cla <- agghoo(iris[,-5], iris[,5]) +a_cla$fit(mode="standard") +pc <- a_cla$predict(iris[,-5] + rnorm(600, sd=0.1)) + +} diff --git a/man/compareToStandard.Rd b/man/compareToStandard.Rd new file mode 100644 index 0000000..5787de9 --- /dev/null +++ b/man/compareToStandard.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/agghoo.R +\name{compareToStandard} +\alias{compareToStandard} +\title{compareToStandard} +\usage{ +compareToStandard(df, t_idx, task = NA, rseed = -1) +} +\description{ +Temporary function to compare agghoo to CV +(TODO: extended, in another file, more tests - when faster code). +}