Update package to send on CRAN

author Benjamin Auder <benjamin.auder@somewhere>

Fri, 9 Sep 2022 15:49:44 +0000 (17:49 +0200)

committer Benjamin Auder <benjamin.auder@somewhere>

Fri, 9 Sep 2022 15:49:44 +0000 (17:49 +0200)
author Benjamin Auder <benjamin.auder@somewhere>
Fri, 9 Sep 2022 15:49:44 +0000 (17:49 +0200)
committer Benjamin Auder <benjamin.auder@somewhere>
Fri, 9 Sep 2022 15:49:44 +0000 (17:49 +0200)
diff --git a/DESCRIPTION b/DESCRIPTION

index 5e85d59..140abb3 100644 (file)
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
  Package: agghoo
-Title: Aggregated Hold-out Cross Validation
-Date: 2021-06-05
+Title: Aggregated Hold-Out Cross Validation
+Date: 2022-08-30
  Version: 0.1-0
  Description: The 'agghoo' procedure is an alternative to usual cross-validation.
      Instead of choosing the best model trained on V subsamples, it determines
@@ -17,15 +17,16 @@ Maintainer: Benjamin Auder <benjamin.auder@universite-paris-saclay.fr>
  Depends:
      R (>= 3.5.0)
  Imports:
+    class,
+    parallel,
      R6,
      rpart,
-    randomForest,
      FNN
  Suggests:
      roxygen2
  URL: https://git.auder.net/?p=agghoo.git
  License: MIT + file LICENSE
-RoxygenNote: 7.1.1
+RoxygenNote: 7.2.1
  Collate: 
      'compareTo.R'
      'agghoo.R'
diff --git a/LICENSE b/LICENSE

index 6e92110..094ff81 100644 (file)
--- a/LICENSE
+++ b/LICENSE
@@ -1,2 +1,2 @@
-YEAR: 2021
+YEAR: 2021-2022
  COPYRIGHT HOLDER: Sylvain Arlot, Benjamin Auder, Melina Gallopin, Matthieu Lerasle, Guillaume Maillard
diff --git a/NAMESPACE b/NAMESPACE

index 74d8bd5..7bbddef 100644 (file)
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,14 +1,11 @@
  # Generated by roxygen2: do not edit by hand
  
  export(AgghooCV)
-export(CVvoting_run)
  export(Model)
  export(agghoo)
-export(agghoo_run)
  export(compareMulti)
  export(compareRange)
  export(compareTo)
-export(standardCV_run)
  importFrom(FNN,knn.reg)
  importFrom(R6,R6Class)
  importFrom(class,knn)
diff --git a/R/R6_Model.R b/R/R6_Model.R

index 1719666..d48825e 100644 (file)
--- a/R/R6_Model.R
+++ b/R/R6_Model.R
@@ -68,7 +68,7 @@ Model <- R6::R6Class("Model",
      getGmodel = function(family, task) {
        if (family == "tree") {
          function(dataHO, targetHO, param) {
-          require(rpart)
+          base::require(rpart)
            method <- ifelse(task == "classification", "class", "anova")
            if (is.null(colnames(dataHO)))
              colnames(dataHO) <- paste0("V", 1:ncol(dataHO))
@@ -98,13 +98,13 @@ Model <- R6::R6Class("Model",
        else if (family == "knn") {
          if (task == "classification") {
            function(dataHO, targetHO, param) {
-            require(class)
+            base::require(class)
              function(X) class::knn(dataHO, X, cl=targetHO, k=param)
            }
          }
          else {
            function(dataHO, targetHO, param) {
-            require(FNN)
+            base::require(FNN)
              function(X) FNN::knn.reg(dataHO, X, y=targetHO, k=param)$pred
            }
          }
@@ -114,7 +114,7 @@ Model <- R6::R6Class("Model",
      getParams = function(family, data, target, task) {
        if (family == "tree") {
          # Run rpart once to obtain a CV grid for parameter cp
-        require(rpart)
+        base::require(rpart)
          df <- data.frame(cbind(data, target=target))
          ctrl <- list(
            cp = 0,
diff --git a/R/compareTo.R b/R/compareTo.R

index 28cb711..fe5b24d 100644 (file)
--- a/R/compareTo.R
+++ b/R/compareTo.R
@@ -78,8 +78,6 @@ CVvoting_core <- function(data, target, task, gmodel, params, loss, CV) {
  #' Run and eval the standard cross-validation procedure.
  #' Parameters are rather explicit except "floss", which corresponds to the
  #' "final" loss function, applied to compute the error on testing dataset.
-#'
-#' @export
  standardCV_run <- function(
    dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ...
  ) {
@@ -104,8 +102,6 @@ standardCV_run <- function(
  #' Run and eval the voting cross-validation procedure.
  #' Parameters are rather explicit except "floss", which corresponds to the
  #' "final" loss function, applied to compute the error on testing dataset.
-#'
-#' @export
  CVvoting_run <- function(
    dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ...
  ) {
@@ -130,8 +126,6 @@ CVvoting_run <- function(
  #' Run and eval the agghoo procedure.
  #' Parameters are rather explicit except "floss", which corresponds to the
  #' "final" loss function, applied to compute the error on testing dataset.
-#'
-#' @export
  agghoo_run <- function(
    dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ...
  ) {
@@ -209,7 +203,7 @@ compareTo <- function(
  compareMulti <- function(
    data, target, method_s, N=100, nc=NA, floss=NULL, verbose=TRUE, ...
  ) {
-  require(parallel)
+  base::require(parallel)
    if (is.na(nc))
      nc <- parallel::detectCores()
  
diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/DESCRIPTION b/agghoo.Rcheck/00_pkg_src/agghoo/DESCRIPTION

new file mode 100644 (file)

index 0000000..21f9ca3
--- /dev/null
+++ b/agghoo.Rcheck/00_pkg_src/agghoo/DESCRIPTION
@@ -0,0 +1,26 @@
+Package: agghoo
+Title: Aggregated Hold-Out Cross Validation
+Date: 2022-08-30
+Version: 0.1-0
+Description: The 'agghoo' procedure is an alternative to usual cross-validation.
+    Instead of choosing the best model trained on V subsamples, it determines
+    a winner model for each subsample, and then aggregate the V outputs.
+    For the details, see "Aggregated hold-out" by Guillaume Maillard,
+    Sylvain Arlot, Matthieu Lerasle (2021) <arXiv:1909.04890>
+    published in Journal of Machine Learning Research 22(20):1--55.
+Author: Sylvain Arlot <sylvain.arlot@universite-paris-saclay.fr> [cph,ctb],
+    Benjamin Auder <benjamin.auder@universite-paris-saclay.fr> [aut,cre,cph],
+    Melina Gallopin <melina.gallopin@universite-paris-saclay.fr> [cph,ctb],
+    Matthieu Lerasle <matthieu.lerasle@universite-paris-saclay.fr> [cph,ctb],
+    Guillaume Maillard <guillaume.maillard@uni.lu> [cph,ctb]
+Maintainer: Benjamin Auder <benjamin.auder@universite-paris-saclay.fr>
+Depends: R (>= 3.5.0)
+Imports: class, parallel, R6, rpart, FNN
+Suggests: roxygen2
+URL: https://git.auder.net/?p=agghoo.git
+License: MIT + file LICENSE
+RoxygenNote: 7.2.1
+Collate: 'compareTo.R' 'agghoo.R' 'R6_AgghooCV.R' 'R6_Model.R'
+        'checks.R' 'utils.R' 'A_NAMESPACE.R'
+NeedsCompilation: no
+Packaged: 2022-09-09 15:45:56 UTC; auder
diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/LICENSE b/agghoo.Rcheck/00_pkg_src/agghoo/LICENSE

new file mode 100644 (file)

index 0000000..094ff81
--- /dev/null
+++ b/agghoo.Rcheck/00_pkg_src/agghoo/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2021-2022
+COPYRIGHT HOLDER: Sylvain Arlot, Benjamin Auder, Melina Gallopin, Matthieu Lerasle, Guillaume Maillard
diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/NAMESPACE b/agghoo.Rcheck/00_pkg_src/agghoo/NAMESPACE

new file mode 100644 (file)

index 0000000..7bbddef
--- /dev/null
+++ b/agghoo.Rcheck/00_pkg_src/agghoo/NAMESPACE
@@ -0,0 +1,13 @@
+# Generated by roxygen2: do not edit by hand
+
+export(AgghooCV)
+export(Model)
+export(agghoo)
+export(compareMulti)
+export(compareRange)
+export(compareTo)
+importFrom(FNN,knn.reg)
+importFrom(R6,R6Class)
+importFrom(class,knn)
+importFrom(rpart,rpart)
+importFrom(stats,ppr)
diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/A_NAMESPACE.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/A_NAMESPACE.R

new file mode 100644 (file)

index 0000000..0466833
--- /dev/null
+++ b/agghoo.Rcheck/00_pkg_src/agghoo/R/A_NAMESPACE.R
@@ -0,0 +1,7 @@
+#' @include utils.R
+#' @include checks.R
+#' @include R6_Model.R
+#' @include R6_AgghooCV.R
+#' @include agghoo.R
+#' @include compareTo.R
+NULL
diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_AgghooCV.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_AgghooCV.R

new file mode 100644 (file)

index 0000000..328c141
--- /dev/null
+++ b/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_AgghooCV.R
@@ -0,0 +1,115 @@
+#' @title R6 class with agghoo functions fit() and predict().
+#'
+#' @description
+#' Class encapsulating the methods to run to obtain the best predictor
+#' from the list of models (see 'Model' class).
+#'
+#' @importFrom R6 R6Class
+#'
+#' @export
+AgghooCV <- R6::R6Class("AgghooCV",
+  public = list(
+    #' @description Create a new AgghooCV object.
+    #' @param data Matrix or data.frame
+    #' @param target Vector of targets (generally numeric or factor)
+    #' @param task "regression" or "classification".
+    #'             Default: classification if target not numeric.
+    #' @param gmodel Generic model returning a predictive function
+    #'               Default: tree if mixed data, knn/ppr otherwise.
+    #' @param loss Function assessing the error of a prediction
+    #'             Default: error rate or mean(abs(error)).
+    initialize = function(data, target, task, gmodel, loss) {
+      private$data <- data
+      private$target <- target
+      private$task <- task
+      private$gmodel <- gmodel
+      private$loss <- loss
+    },
+    #' @description Fit an agghoo model.
+    #' @param CV List describing cross-validation to run. Slots: \cr
+    #'          - type: 'vfold' or 'MC' for Monte-Carlo (default: MC) \cr
+    #'          - V: number of runs (default: 10) \cr
+    #'          - test_size: percentage of data in the test dataset, for MC
+    #'            (irrelevant for V-fold). Default: 0.2. \cr
+    #'          - shuffle: wether or not to shuffle data before V-fold.
+    #'            Irrelevant for Monte-Carlo; default: TRUE \cr
+    #'        Default (if NULL): type="MC", V=10, test_size=0.2
+    fit = function(CV = NULL) {
+      CV <- checkCV(CV)
+      n <- nrow(private$data)
+      shuffle_inds <- NULL
+      if (CV$type == "vfold" && CV$shuffle)
+        shuffle_inds <- sample(n, n)
+      # Result: list of V predictive models (+ parameters for info)
+      private$pmodels <- list()
+      for (v in seq_len(CV$V)) {
+        # Prepare train / test data and target, from full dataset.
+        # dataHO: "data Hold-Out" etc.
+        test_indices <- get_testIndices(n, CV, v, shuffle_inds)
+        d <- splitTrainTest(private$data, private$target, test_indices)
+        best_model <- NULL
+        best_error <- Inf
+        for (p in seq_len(private$gmodel$nmodels)) {
+          model_pred <- private$gmodel$get(d$dataTrain, d$targetTrain, p)
+          prediction <- model_pred(d$dataTest)
+          error <- private$loss(prediction, d$targetTest)
+          if (error <= best_error) {
+            newModel <- list(model=model_pred, param=private$gmodel$getParam(p))
+            if (error == best_error)
+              best_model[[length(best_model)+1]] <- newModel
+            else {
+              best_model <- list(newModel)
+              best_error <- error
+            }
+          }
+        }
+        # Choose a model at random in case of ex-aequos
+        private$pmodels[[v]] <- best_model[[ sample(length(best_model),1) ]]
+      }
+    },
+    #' @description Predict an agghoo model (after calling fit())
+    #' @param X Matrix or data.frame to predict
+    predict = function(X) {
+      if (!is.matrix(X) && !is.data.frame(X))
+        stop("X: matrix or data.frame")
+      if (!is.list(private$pmodels)) {
+        print("Please call $fit() method first")
+        return (invisible(NULL))
+      }
+      V <- length(private$pmodels)
+      oneLineX <- X[1,]
+      if (is.matrix(X))
+        # HACK: R behaves differently with data frames and matrices.
+        oneLineX <- t(as.matrix(oneLineX))
+      if (length(private$pmodels[[1]]$model(oneLineX)) >= 2)
+        # Soft classification:
+        return (Reduce("+", lapply(private$pmodels, function(m) m$model(X))) / V)
+      n <- nrow(X)
+      all_predictions <- as.data.frame(matrix(nrow=n, ncol=V))
+      for (v in 1:V)
+        all_predictions[,v] <- private$pmodels[[v]]$model(X)
+      if (private$task == "regression")
+        # Easy case: just average each row
+        return (rowMeans(all_predictions))
+      # "Hard" classification:
+      apply(all_predictions, 1, function(row) {
+        t <- table(row)
+        # Next lines in case of ties (broken at random)
+        tmax <- max(t)
+        sample( names(t)[which(t == tmax)], 1 )
+      })
+    },
+    #' @description Return the list of V best parameters (after calling fit())
+    getParams = function() {
+      lapply(private$pmodels, function(m) m$param)
+    }
+  ),
+  private = list(
+    data = NULL,
+    target = NULL,
+    task = NULL,
+    gmodel = NULL,
+    loss = NULL,
+    pmodels = NULL
+  )
+)
diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_Model.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_Model.R

new file mode 100644 (file)

index 0000000..d48825e
--- /dev/null
+++ b/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_Model.R
@@ -0,0 +1,157 @@
+#' @title R6 class representing a (generic) model.
+#'
+#' @description
+#' "Model" class, containing a (generic) learning function, which from
+#' data + target [+ params] returns a prediction function X --> y.
+#' Parameters for cross-validation are either provided or estimated.
+#' Model family can be chosen among "tree", "ppr" and "knn" for now.
+#'
+#' @importFrom FNN knn.reg
+#' @importFrom class knn
+#' @importFrom stats ppr
+#' @importFrom rpart rpart
+#'
+#' @export
+Model <- R6::R6Class("Model",
+  public = list(
+    #' @field nmodels Number of parameters (= number of [predictive] models)
+    nmodels = NA,
+    #' @description Create a new generic model.
+    #' @param data Matrix or data.frame
+    #' @param target Vector of targets (generally numeric or factor)
+    #' @param task "regression" or "classification"
+    #' @param gmodel Generic model returning a predictive function; chosen
+    #'               automatically given data and target nature if not provided.
+    #' @param params List of parameters for cross-validation (each defining a model)
+    initialize = function(data, target, task, gmodel = NULL, params = NULL) {
+      if (is.null(gmodel)) {
+        # (Generic) model not provided
+        all_numeric <- is.numeric(as.matrix(data))
+        if (!all_numeric)
+          # At least one non-numeric column: use trees
+          gmodel = "tree"
+        else
+          # Numerical data
+          gmodel = ifelse(task == "regression", "ppr", "knn")
+      }
+      if (is.null(params))
+        # Here, gmodel is a string (= its family),
+        # because a custom model must be given with its parameters.
+        params <- as.list(private$getParams(gmodel, data, target, task))
+      private$params <- params
+      if (is.character(gmodel))
+        gmodel <- private$getGmodel(gmodel, task)
+      private$gmodel <- gmodel
+      self$nmodels <- length(private$params)
+    },
+    #' @description
+    #' Returns the model at index "index", trained on dataHO/targetHO.
+    #' @param dataHO Matrix or data.frame
+    #' @param targetHO Vector of targets (generally numeric or factor)
+    #' @param index Index of the model in 1...nmodels
+    get = function(dataHO, targetHO, index) {
+      private$gmodel(dataHO, targetHO, private$params[[index]])
+    },
+    #' @description
+    #' Returns the parameter at index "index".
+    #' @param index Index of the model in 1...nmodels
+    getParam = function(index) {
+      private$params[[index]]
+    }
+  ),
+  private = list(
+    # No need to expose model or parameters list
+    gmodel = NULL,
+    params = NULL,
+    # Main function: given a family, return a generic model, which in turn
+    # will output a predictive model from data + target + params.
+    getGmodel = function(family, task) {
+      if (family == "tree") {
+        function(dataHO, targetHO, param) {
+          base::require(rpart)
+          method <- ifelse(task == "classification", "class", "anova")
+          if (is.null(colnames(dataHO)))
+            colnames(dataHO) <- paste0("V", 1:ncol(dataHO))
+          df <- data.frame(cbind(dataHO, target=targetHO))
+          model <- rpart::rpart(target ~ ., df, method=method, control=list(cp=param))
+          if (task == "regression")
+            type <- "vector"
+          else {
+            if (is.null(dim(targetHO)))
+              type <- "class"
+            else
+              type <- "prob"
+          }
+          function(X) {
+            if (is.null(colnames(X)))
+              colnames(X) <- paste0("V", 1:ncol(X))
+            predict(model, as.data.frame(X), type=type)
+          }
+        }
+      }
+      else if (family == "ppr") {
+        function(dataHO, targetHO, param) {
+          model <- stats::ppr(dataHO, targetHO, nterms=param)
+          function(X) predict(model, X)
+        }
+      }
+      else if (family == "knn") {
+        if (task == "classification") {
+          function(dataHO, targetHO, param) {
+            base::require(class)
+            function(X) class::knn(dataHO, X, cl=targetHO, k=param)
+          }
+        }
+        else {
+          function(dataHO, targetHO, param) {
+            base::require(FNN)
+            function(X) FNN::knn.reg(dataHO, X, y=targetHO, k=param)$pred
+          }
+        }
+      }
+    },
+    # Return a default list of parameters, given a gmodel family
+    getParams = function(family, data, target, task) {
+      if (family == "tree") {
+        # Run rpart once to obtain a CV grid for parameter cp
+        base::require(rpart)
+        df <- data.frame(cbind(data, target=target))
+        ctrl <- list(
+          cp = 0,
+          minsplit = 2,
+          minbucket = 1,
+          xval = 0)
+        method <- ifelse(task == "classification", "class", "anova")
+        r <- rpart(target ~ ., df, method=method, control=ctrl)
+        cps <- r$cptable[-1,1]
+        if (length(cps) <= 1)
+          stop("No cross-validation possible: select another model")
+        if (length(cps) <= 11)
+          return (cps)
+        step <- (length(cps) - 1) / 10
+        cps[unique(round(seq(1, length(cps), step)))]
+      }
+      else if (family == "ppr")
+        # This is nterms in ppr() function
+        1:10
+      else if (family == "knn") {
+        n <- nrow(data)
+        # Choose ~10 NN values
+        K <- length(unique(target))
+        if (n <= 10)
+          return (1:(n-1))
+        sqrt_n <- sqrt(n)
+        step <- (2*sqrt_n - 1) / 10
+        grid <- unique(round(seq(1, 2*sqrt_n, step)))
+        if (K == 2) {
+          # Common binary classification case: odd number of neighbors
+          for (i in 2:11) {
+            if (grid[i] %% 2 == 0)
+              grid[i] <- grid[i] + 1 #arbitrary choice
+          }
+        }
+        grid
+      }
+    }
+  )
+)
diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/agghoo.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/agghoo.R

new file mode 100644 (file)

index 0000000..48ac741
--- /dev/null
+++ b/agghoo.Rcheck/00_pkg_src/agghoo/R/agghoo.R
@@ -0,0 +1,58 @@
+#' agghoo
+#'
+#' Run the (core) agghoo procedure.
+#' Arguments specify the list of models, their parameters and the
+#' cross-validation settings, among others.
+#'
+#' @param data Data frame or matrix containing the data in lines.
+#' @param target The target values to predict. Generally a vector,
+#'        but possibly a matrix in the case of "soft classification".
+#' @param task "classification" or "regression". Default:
+#'        regression if target is numerical, classification otherwise.
+#' @param gmodel A "generic model", which is a function returning a predict
+#'        function (taking X as only argument) from the tuple
+#'        (dataHO, targetHO, param), where 'HO' stands for 'Hold-Out',
+#'        referring to cross-validation. Cross-validation is run on an array
+#'        of 'param's. See params argument. Default: see R6::Model.
+#' @param params A list of parameters. Often, one list cell is just a
+#'        numerical value, but in general it could be of any type.
+#'        Default: see R6::Model.
+#' @param loss A function assessing the error of a prediction.
+#'        Arguments are y1 and y2 (comparing a prediction to known values).
+#'        loss(y1, y2) --> real number (error). Default: see R6::AgghooCV.
+#'
+#' @return
+#' An R6::AgghooCV object o. Then, call o$fit() and finally o$predict(newData)
+#'
+#' @examples
+#' # Regression:
+#' a_reg <- agghoo(iris[,-c(2,5)], iris[,2])
+#' a_reg$fit()
+#' pr <- a_reg$predict(iris[,-c(2,5)] + rnorm(450, sd=0.1))
+#' # Classification
+#' a_cla <- agghoo(iris[,-5], iris[,5])
+#' a_cla$fit()
+#' pc <- a_cla$predict(iris[,-5] + rnorm(600, sd=0.1))
+#'
+#' @seealso Function \code{\link{compareTo}}
+#'
+#' @references
+#' Guillaume Maillard, Sylvain Arlot, Matthieu Lerasle. "Aggregated hold-out".
+#' Journal of Machine Learning Research 22(20):1--55, 2021.
+#'
+#' @export
+agghoo <- function(
+  data, target, task = NULL, gmodel = NULL, params = NULL, loss = NULL
+) {
+       # Args check:
+  checkDaTa(data, target)
+  task <- checkTask(task, target)
+  modPar <- checkModPar(gmodel, params)
+  loss <- checkLoss(loss, task)
+
+  # Build Model object (= list of parameterized models)
+  model <- Model$new(data, target, task, modPar$gmodel, modPar$params)
+
+  # Return AgghooCV object, to run and predict
+  AgghooCV$new(data, target, task, model, loss)
+}
diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/checks.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/checks.R

new file mode 100644 (file)

index 0000000..a19d55f
--- /dev/null
+++ b/agghoo.Rcheck/00_pkg_src/agghoo/R/checks.R
@@ -0,0 +1,102 @@
+# Internal usage: check and fill arguments with default values.
+
+defaultLoss_classif <- function(y1, y2) {
+  if (is.null(dim(y1)))
+    # Standard case: "hard" classification
+    mean(y1 != y2)
+  else {
+    # "Soft" classification: predict() outputs a probability matrix
+    # In this case "target" could be in matrix form.
+    if (!is.null(dim(y2)))
+      mean(rowSums(abs(y1 - y2)))
+    else {
+      # Or not: y2 is a "factor".
+      y2 <- as.character(y2)
+      # NOTE: the user should provide target in matrix form because
+      # matching y2 with columns is rather inefficient!
+      names <- colnames(y1)
+      positions <- list()
+      for (idx in seq_along(names))
+        positions[[ names[idx] ]] <- idx
+      mean(vapply(
+        seq_along(y2),
+        function(idx) sum(abs(y1[idx,] - positions[[ y2[idx] ]])),
+        0))
+    }
+  }
+}
+
+defaultLoss_regress <- function(y1, y2) {
+  mean(abs(y1 - y2))
+}
+
+# TODO: allow strings like "MSE", "abs" etc
+checkLoss <- function(loss, task) {
+  if (!is.null(loss) && !is.function(loss))
+    stop("loss: function(y1, y2) --> Real")
+  if (is.null(loss)) {
+    loss <- if (task == "classification") {
+      defaultLoss_classif
+    } else {
+      defaultLoss_regress
+    }
+  }
+  loss
+}
+
+checkCV <- function(CV) {
+  if (is.null(CV))
+    CV <- list(type="MC", V=10, test_size=0.2, shuffle=TRUE)
+  else {
+    if (!is.list(CV))
+      stop("CV: list of type('MC'|'vfold'), V(integer, [test_size, shuffle]")
+    if (is.null(CV$type)) {
+      warning("CV$type not provided: set to MC")
+      CV$type <- "MC"
+    }
+    if (is.null(CV$V)) {
+      warning("CV$V not provided: set to 10")
+      CV$V <- 10
+    }
+    if (CV$type == "MC" && is.null(CV$test_size))
+      CV$test_size <- 0.2
+    if (CV$type == "vfold" && is.null(CV$shuffle))
+      CV$shuffle <- TRUE
+  }
+  CV
+}
+
+checkDaTa <- function(data, target) {
+  if (!is.data.frame(data) && !is.matrix(data))
+    stop("data: data.frame or matrix")
+  if (is.data.frame(target) || is.matrix(target)) {
+    if (!is.numeric(target))
+      stop("multi-columns target must be a probability matrix")
+    if (nrow(target) != nrow(data) || ncol(target) == 1)
+      stop("target probability matrix does not match data size")
+  }
+  else if (!is.numeric(target) && !is.factor(target) && !is.character(target))
+    stop("target: numeric, factor or character vector")
+}
+
+checkTask <- function(task, target) {
+  if (!is.null(task))
+    task <- match.arg(task, c("classification", "regression"))
+  ifelse(is.numeric(target), "regression", "classification")
+}
+
+checkModPar <- function(gmodel, params) {
+  if (is.character(gmodel))
+    gmodel <- match.arg(gmodel, c("knn", "ppr", "rf", "tree"))
+  else if (!is.null(gmodel) && !is.function(gmodel))
+    stop("gmodel: function(dataHO, targetHO, param) --> function(X) --> y")
+  if (is.numeric(params) || is.character(params))
+    params <- as.list(params)
+  if (!is.list(params) && !is.null(params))
+    stop("params: numerical, character, or list (passed to model)")
+  if (is.function(gmodel) && !is.list(params))
+    stop("params must be provided when using a custom model")
+  if (is.list(params) && is.null(gmodel))
+    stop("model (or family) must be provided when using custom params")
+  list(gmodel=gmodel, params=params)
+}
diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/compareTo.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/compareTo.R

new file mode 100644 (file)

index 0000000..fe5b24d
--- /dev/null
+++ b/agghoo.Rcheck/00_pkg_src/agghoo/R/compareTo.R
@@ -0,0 +1,247 @@
+#' standardCV_core
+#'
+#' Cross-validation method, added here as an example.
+#' Parameters are described in ?agghoo and ?AgghooCV
+standardCV_core <- function(data, target, task, gmodel, params, loss, CV) {
+  n <- nrow(data)
+  shuffle_inds <- NULL
+  if (CV$type == "vfold" && CV$shuffle)
+    shuffle_inds <- sample(n, n)
+  list_testinds <- list()
+  for (v in seq_len(CV$V))
+    list_testinds[[v]] <- get_testIndices(n, CV, v, shuffle_inds)
+  gmodel <- agghoo::Model$new(data, target, task, gmodel, params)
+  best_error <- Inf
+  best_p <- NULL
+  for (p in seq_len(gmodel$nmodels)) {
+    error <- Reduce('+', lapply(seq_len(CV$V), function(v) {
+      testIdx <- list_testinds[[v]]
+      d <- splitTrainTest(data, target, testIdx)
+      model_pred <- gmodel$get(d$dataTrain, d$targetTrain, p)
+      prediction <- model_pred(d$dataTest)
+      loss(prediction, d$targetTest)
+    }) )
+    if (error <= best_error) {
+      if (error == best_error)
+        best_p[[length(best_p)+1]] <- p
+      else {
+        best_p <- list(p)
+        best_error <- error
+      }
+    }
+  }
+  chosenP <- best_p[[ sample(length(best_p), 1) ]]
+  list(model=gmodel$get(data, target, chosenP), param=gmodel$getParam(chosenP))
+}
+
+#' CVvoting_core
+#'
+#' "voting" cross-validation method, added here as an example.
+#' Parameters are described in ?agghoo and ?AgghooCV
+CVvoting_core <- function(data, target, task, gmodel, params, loss, CV) {
+  CV <- checkCV(CV)
+  n <- nrow(data)
+  shuffle_inds <- NULL
+  if (CV$type == "vfold" && CV$shuffle)
+    shuffle_inds <- sample(n, n)
+  gmodel <- agghoo::Model$new(data, target, task, gmodel, params)
+  bestP <- rep(0, gmodel$nmodels)
+  for (v in seq_len(CV$V)) {
+    test_indices <- get_testIndices(n, CV, v, shuffle_inds)
+    d <- splitTrainTest(data, target, test_indices)
+    best_p <- NULL
+    best_error <- Inf
+    for (p in seq_len(gmodel$nmodels)) {
+      model_pred <- gmodel$get(d$dataTrain, d$targetTrain, p)
+      prediction <- model_pred(d$dataTest)
+      error <- loss(prediction, d$targetTest)
+      if (error <= best_error) {
+        if (error == best_error)
+          best_p[[length(best_p)+1]] <- p
+        else {
+          best_p <- list(p)
+          best_error <- error
+        }
+      }
+    }
+    for (p in best_p)
+      bestP[p] <- bestP[p] + 1
+  }
+  # Choose a param at random in case of ex-aequos:
+  maxP <- max(bestP)
+  chosenP <- sample(which(bestP == maxP), 1)
+  list(model=gmodel$get(data, target, chosenP), param=gmodel$getParam(chosenP))
+}
+
+#' standardCV_run
+#'
+#' Run and eval the standard cross-validation procedure.
+#' Parameters are rather explicit except "floss", which corresponds to the
+#' "final" loss function, applied to compute the error on testing dataset.
+standardCV_run <- function(
+  dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ...
+) {
+  args <- list(...)
+  task <- checkTask(args$task, targetTrain)
+  modPar <- checkModPar(args$gmodel, args$params)
+  loss <- checkLoss(args$loss, task)
+  CV <- checkCV(args$CV)
+  s <- standardCV_core(
+    dataTrain, targetTrain, task, modPar$gmodel, modPar$params, loss, CV)
+  if (verbose)
+    print(paste( "Parameter:", s$param ))
+  p <- s$model(dataTest)
+  err <- floss(p, targetTest)
+  if (verbose)
+    print(paste("error CV:", err))
+  invisible(err)
+}
+
+#' CVvoting_run
+#'
+#' Run and eval the voting cross-validation procedure.
+#' Parameters are rather explicit except "floss", which corresponds to the
+#' "final" loss function, applied to compute the error on testing dataset.
+CVvoting_run <- function(
+  dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ...
+) {
+  args <- list(...)
+  task <- checkTask(args$task, targetTrain)
+  modPar <- checkModPar(args$gmodel, args$params)
+  loss <- checkLoss(args$loss, task)
+  CV <- checkCV(args$CV)
+  s <- CVvoting_core(
+    dataTrain, targetTrain, task, modPar$gmodel, modPar$params, loss, CV)
+  if (verbose)
+    print(paste( "Parameter:", s$param ))
+  p <- s$model(dataTest)
+  err <- floss(p, targetTest)
+  if (verbose)
+    print(paste("error CV:", err))
+  invisible(err)
+}
+
+#' agghoo_run
+#'
+#' Run and eval the agghoo procedure.
+#' Parameters are rather explicit except "floss", which corresponds to the
+#' "final" loss function, applied to compute the error on testing dataset.
+agghoo_run <- function(
+  dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ...
+) {
+  args <- list(...)
+  CV <- checkCV(args$CV)
+  # Must remove CV arg, or agghoo will complain "error: unused arg"
+  args$CV <- NULL
+  a <- do.call(agghoo, c(list(data=dataTrain, target=targetTrain), args))
+  a$fit(CV)
+  if (verbose) {
+    print("Parameters:")
+    print(unlist(a$getParams()))
+  }
+  pa <- a$predict(dataTest)
+  err <- floss(pa, targetTest)
+  if (verbose)
+    print(paste("error agghoo:", err))
+  invisible(err)
+}
+
+#' compareTo
+#'
+#' Compare a list of learning methods (or run only one), on data/target.
+#'
+#' @param data Data matrix or data.frame
+#' @param target Target vector (generally)
+#' @param method_s Either a single function, or a list
+#'                 (examples: agghoo_run, standardCV_run)
+#' @param rseed Seed of the random generator (-1 means "random seed")
+#' @param floss Loss function to compute the error on testing dataset.
+#' @param verbose TRUE to request methods to be verbose.
+#' @param ... arguments passed to method_s function(s)
+#'
+#' @export
+compareTo <- function(
+  data, target, method_s, rseed=-1, floss=NULL, verbose=TRUE, ...
+) {
+  if (rseed >= 0)
+    set.seed(rseed)
+  n <- nrow(data)
+  test_indices <- sample( n, round(n / ifelse(n >= 500, 10, 5)) )
+  d <- splitTrainTest(data, target, test_indices)
+
+  # Set error function to be used on model outputs (not in core method)
+  task <- checkTask(list(...)$task, target)
+  if (is.null(floss)) {
+    floss <- function(y1, y2) {
+      ifelse(task == "classification", mean(y1 != y2), mean(abs(y1 - y2)))
+    }
+  }
+
+  # Run (and compare) all methods:
+  runOne <- function(o) {
+    o(d$dataTrain, d$dataTest, d$targetTrain, d$targetTest, floss, verbose, ...)
+  }
+  errors <- c()
+  if (is.list(method_s))
+    errors <- sapply(method_s, runOne)
+  else if (is.function(method_s))
+    errors <- runOne(method_s)
+  invisible(errors)
+}
+
+#' compareMulti
+#'
+#' Run compareTo N times in parallel.
+#'
+#' @inheritParams compareTo
+#' @param N Number of calls to method(s)
+#' @param nc Number of cores. Set to parallel::detectCores() if undefined.
+#'           Set it to any value <=1 to say "no parallelism".
+#' @param verbose TRUE to print task numbers and "Errors:" in the end.
+#'
+#' @export
+compareMulti <- function(
+  data, target, method_s, N=100, nc=NA, floss=NULL, verbose=TRUE, ...
+) {
+  base::require(parallel)
+  if (is.na(nc))
+    nc <- parallel::detectCores()
+
+  # "One" comparison for each method in method_s (list)
+  compareOne <- function(n) {
+    if (verbose)
+      print(n)
+    compareTo(data, target, method_s, n, floss, verbose=FALSE, ...)
+  }
+
+  errors <- if (nc >= 2) {
+    parallel::mclapply(1:N, compareOne, mc.cores = nc)
+  } else {
+    lapply(1:N, compareOne)
+  }
+  if (verbose)
+    print("Errors:")
+  Reduce('+', errors) / N
+}
+
+#' compareRange
+#'
+#' Run compareMulti on several values of the parameter V.
+#'
+#' @inheritParams compareMulti
+#' @param V_range Values of V to be tested.
+#'
+#' @export
+compareRange <- function(
+  data, target, method_s, N=100, nc=NA, floss=NULL, V_range=c(10,15,20), ...
+) {
+  args <- list(...)
+  # Avoid warnings if V is left unspecified:
+  CV <- suppressWarnings( checkCV(args$CV) )
+  errors <- lapply(V_range, function(V) {
+    args$CV$V <- V
+    do.call(compareMulti, c(list(data=data, target=target, method_s=method_s,
+                                 N=N, nc=nc, floss=floss, verbose=F), args))
+  })
+  print(paste(V_range, errors))
+}
diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/utils.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/utils.R

new file mode 100644 (file)

index 0000000..823b123
--- /dev/null
+++ b/agghoo.Rcheck/00_pkg_src/agghoo/R/utils.R
@@ -0,0 +1,30 @@
+# Helper for cross-validation: return the next test indices.
+get_testIndices <- function(n, CV, v, shuffle_inds) {
+  if (CV$type == "vfold") {
+    # Slice indices (optionnally shuffled)
+    first_index = round((v-1) * n / CV$V) + 1
+    last_index = round(v * n / CV$V)
+    test_indices = first_index:last_index
+    if (!is.null(shuffle_inds))
+      test_indices <- shuffle_inds[test_indices]
+  }
+  else
+    # Monte-Carlo cross-validation
+    test_indices = sample(n, round(n * CV$test_size))
+  test_indices
+}
+
+# Helper which split data into training and testing parts.
+splitTrainTest <- function(data, target, testIdx) {
+  dataTrain <- data[-testIdx,]
+  targetTrain <- target[-testIdx]
+  dataTest <- data[testIdx,]
+  targetTest <- target[testIdx]
+  # [HACK] R will cast 1-dim matrices into vectors:
+  if (!is.matrix(dataTrain) && !is.data.frame(dataTrain))
+    dataTrain <- as.matrix(dataTrain)
+  if (!is.matrix(dataTest) && !is.data.frame(dataTest))
+    dataTest <- as.matrix(dataTest)
+  list(dataTrain=dataTrain, targetTrain=targetTrain,
+       dataTest=dataTest, targetTest=targetTest)
+}
diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/README.md b/agghoo.Rcheck/00_pkg_src/agghoo/README.md

new file mode 100644 (file)

index 0000000..337abcb
--- /dev/null
+++ b/agghoo.Rcheck/00_pkg_src/agghoo/README.md
@@ -0,0 +1,15 @@
+# agghoo
+
+R package for model selection based on aggregation.
+Alternative to standard cross-validation.
+
+## Install the package
+
+From GitHub: `devtools::install_github("yagu0/agghoo")`
+
+Locally, in a terminal: `R CMD INSTALL .`
+
+## Use the package
+
+    library(agghoo)
+    ?agghoo
diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/TODO b/agghoo.Rcheck/00_pkg_src/agghoo/TODO

new file mode 100644 (file)

index 0000000..f197d8a
--- /dev/null
+++ b/agghoo.Rcheck/00_pkg_src/agghoo/TODO
@@ -0,0 +1,2 @@
+Support des valeurs manquantes (cf. mlbench::Ozone dataset)
+Méthode pour données mixtes ? (que tree actuellement)
diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/example/example.R b/agghoo.Rcheck/00_pkg_src/agghoo/example/example.R

new file mode 100644 (file)

index 0000000..7fae2ce
--- /dev/null
+++ b/agghoo.Rcheck/00_pkg_src/agghoo/example/example.R
@@ -0,0 +1,43 @@
+library(agghoo)
+
+data(iris) #already there
+library(mlbench)
+data(PimaIndiansDiabetes)
+
+# Run only agghoo on iris dataset (split into train/test, etc).
+# Default parameters: see ?agghoo and ?AgghooCV
+compareTo(iris[,-5], iris[,5], agghoo_run)
+
+# Run both agghoo and standard CV, specifiying some parameters.
+compareTo(iris[,-5], iris[,5], list(agghoo_run, standardCV_run), gmodel="tree")
+compareTo(iris[,-5], iris[,5], list(agghoo_run, standardCV_run),
+          gmodel="knn", params=c(3, 7, 13, 17, 23, 31),
+          CV = list(type="vfold", V=5, shuffle=T))
+
+# Run both agghoo and standard CV, averaging errors over N=10 runs
+# (possible for a single method but wouldn't make much sense...).
+compareMulti(PimaIndiansDiabetes[,-9], PimaIndiansDiabetes[,9],
+             list(agghoo_run, standardCV_run), N=10, gmodel="rf")
+
+# Compare several values of V
+compareRange(PimaIndiansDiabetes[,-9], PimaIndiansDiabetes[,9],
+             list(agghoo_run, standardCV_run), N=10, V_range=c(10, 20, 30))
+
+# For example to use average of squared differences.
+# Default is "mean(abs(y1 - y2))".
+loss2 <- function(y1, y2) mean((y1 - y2)^2)
+
+# In regression on artificial datasets (TODO: real data?)
+data <- mlbench.twonorm(300, 3)$x
+target <- rowSums(data)
+compareMulti(data, target, list(agghoo_run, standardCV_run),
+             N=10, gmodel="tree", params=c(1, 3, 5, 7, 9), loss=loss2,
+             CV = list(type="MC", V=12, test_size=0.3))
+
+compareMulti(data, target, list(agghoo_run, standardCV_run),
+             N=10, floss=loss2, CV = list(type="vfold", V=10, shuffle=F))
+
+# Random tests to check that method doesn't fail in 1D case
+M <- matrix(rnorm(200), ncol=2)
+compareTo(as.matrix(M[,-2]), M[,2], list(agghoo_run, standardCV_run), gmodel="knn")
+compareTo(as.matrix(M[,-2]), M[,2], list(agghoo_run, standardCV_run), gmodel="tree")
diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/test/TODO b/agghoo.Rcheck/00_pkg_src/agghoo/test/TODO

new file mode 100644 (file)

index 0000000..50acca1
--- /dev/null
+++ b/agghoo.Rcheck/00_pkg_src/agghoo/test/TODO
@@ -0,0 +1 @@
+Some unit tests?
diff --git a/agghoo.Rcheck/00check.log b/agghoo.Rcheck/00check.log

new file mode 100644 (file)

index 0000000..684daae
--- /dev/null
+++ b/agghoo.Rcheck/00check.log
@@ -0,0 +1,52 @@
+* using log directory ‘/home/auder/repos/agghoo/agghoo.Rcheck’
+* using R version 4.2.1 (2022-06-23)
+* using platform: x86_64-pc-linux-gnu (64-bit)
+* using session charset: UTF-8
+* checking for file ‘agghoo/DESCRIPTION’ ... OK
+* this is package ‘agghoo’ version ‘0.1-0’
+* checking package namespace information ... OK
+* checking package dependencies ... OK
+* checking if this is a source package ... OK
+* checking if there is a namespace ... OK
+* checking for executable files ... OK
+* checking for hidden files and directories ... OK
+* checking for portable file names ... OK
+* checking for sufficient/correct file permissions ... OK
+* checking whether package ‘agghoo’ can be installed ... OK
+* checking installed package size ... OK
+* checking package directory ... OK
+* checking DESCRIPTION meta-information ... OK
+* checking top-level files ... OK
+* checking for left-over files ... OK
+* checking index information ... OK
+* checking package subdirectories ... OK
+* checking R files for non-ASCII characters ... OK
+* checking R files for syntax errors ... OK
+* checking whether the package can be loaded ... OK
+* checking whether the package can be loaded with stated dependencies ... OK
+* checking whether the package can be unloaded cleanly ... OK
+* checking whether the namespace can be loaded with stated dependencies ... OK
+* checking whether the namespace can be unloaded cleanly ... OK
+* checking loading without being on the library search path ... OK
+* checking dependencies in R code ... OK
+* checking S3 generic/method consistency ... OK
+* checking replacement functions ... OK
+* checking foreign function calls ... OK
+* checking R code for possible problems ... NOTE
+compareMulti: no visible binding for global variable ‘parallel’
+Undefined global functions or variables:
+  parallel
+* checking for missing documentation entries ... WARNING
+Undocumented code objects:
+  ‘AgghooCV’ ‘Model’ ‘agghoo’ ‘compareMulti’ ‘compareRange’ ‘compareTo’
+All user-level objects in a package should have documentation entries.
+See chapter ‘Writing R documentation files’ in the ‘Writing R
+Extensions’ manual.
+* checking examples ... NONE
+* checking PDF version of manual ... WARNING
+LaTeX errors when creating PDF version.
+This typically indicates Rd problems.
+* checking PDF version of manual without index ... ERROR
+Re-running with no redirection of stdout/stderr.
+* DONE
+Status: 1 ERROR, 2 WARNINGs, 1 NOTE
diff --git a/agghoo.Rcheck/00install.out b/agghoo.Rcheck/00install.out

new file mode 100644 (file)

index 0000000..4ec7d20
--- /dev/null
+++ b/agghoo.Rcheck/00install.out
@@ -0,0 +1,12 @@
+* installing *source* package ‘agghoo’ ...
+** using staged installation
+** R
+** byte-compile and prepare package for lazy loading
+** help
+No man pages found in package  ‘agghoo’ 
+*** installing help indices
+** building package indices
+** testing if installed package can be loaded from temporary location
+** testing if installed package can be loaded from final location
+** testing if installed package keeps a record of temporary installation path
+* DONE (agghoo)
diff --git a/agghoo.Rcheck/Rdlatex.log b/agghoo.Rcheck/Rdlatex.log

new file mode 100644 (file)

index 0000000..ed3d8b1
--- /dev/null
+++ b/agghoo.Rcheck/Rdlatex.log
@@ -0,0 +1,22 @@
+Hmm ... looks like a package
+Converting parsed Rd's to LaTeX Creating pdf output from LaTeX ...
+warning: kpathsea: configuration file texmf.cnf not found in these directories: /usr/bin:/usr/bin/share/texmf-local/web2c:/usr/bin/share/texmf-dist/web2c:/usr/bin/share/texmf/web2c:/usr/bin/texmf-local/web2c:/usr/bin/texmf-dist/web2c:/usr/bin/texmf/web2c:/usr:/usr/share/texmf-local/web2c:/usr/share/texmf-dist/web2c:/usr/share/texmf/web2c:/usr/texmf-local/web2c:/usr/texmf-dist/web2c:/usr/texmf/web2c://texmf-local/web2c:/://share/texmf-local/web2c://share/texmf-dist/web2c://share/texmf/web2c://texmf-local/web2c://texmf-dist/web2c://texmf/web2c.
+This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022/Arch Linux) (preloaded format=pdflatex)
+
+kpathsea: Running mktexfmt pdflatex.fmt
+mktexfmt: No such file or directory
+I can't find the format file `pdflatex.fmt'!
+Warning in file(con, "r") :
+  cannot open file 'Rd2.log': No such file or directory
+Error in file(con, "r") : cannot open the connection
+warning: kpathsea: configuration file texmf.cnf not found in these directories: /usr/bin:/usr/bin/share/texmf-local/web2c:/usr/bin/share/texmf-dist/web2c:/usr/bin/share/texmf/web2c:/usr/bin/texmf-local/web2c:/usr/bin/texmf-dist/web2c:/usr/bin/texmf/web2c:/usr:/usr/share/texmf-local/web2c:/usr/share/texmf-dist/web2c:/usr/share/texmf/web2c:/usr/texmf-local/web2c:/usr/texmf-dist/web2c:/usr/texmf/web2c://texmf-local/web2c:/://share/texmf-local/web2c://share/texmf-dist/web2c://share/texmf/web2c://texmf-local/web2c://texmf-dist/web2c://texmf/web2c.
+This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022/Arch Linux) (preloaded format=pdflatex)
+
+kpathsea: Running mktexfmt pdflatex.fmt
+mktexfmt: No such file or directory
+I can't find the format file `pdflatex.fmt'!
+Warning in file(con, "r") :
+  cannot open file 'Rd2.log': No such file or directory
+Error in file(con, "r") : cannot open the connection
+Error in running tools::texi2pdf()
+You may want to clean up by 'rm -Rf /tmp/RtmpIZpCnq/Rd2pdf1084ce20004'
diff --git a/agghoo.Rcheck/agghoo-manual.tex b/agghoo.Rcheck/agghoo-manual.tex

new file mode 100644 (file)

index 0000000..8a561f0
--- /dev/null
+++ b/agghoo.Rcheck/agghoo-manual.tex
@@ -0,0 +1,44 @@
+\nonstopmode{}
+\documentclass[letterpaper]{book}
+\usepackage[times,hyper]{Rd}
+\usepackage{makeidx}
+\usepackage[utf8]{inputenc} % @SET ENCODING@
+% \usepackage{graphicx} % @USE GRAPHICX@
+\makeindex{}
+\begin{document}
+\chapter*{}
+\begin{center}
+{\textbf{\huge Package `agghoo'}}
+\par\bigskip{\large \today}
+\end{center}
+\ifthenelse{\boolean{Rd@use@hyper}}{\hypersetup{pdftitle = {agghoo: Aggregated Hold-Out Cross Validation}}}{}
+\begin{description}
+\raggedright{}
+\item[Title]\AsIs{Aggregated Hold-Out Cross Validation}
+\item[Date]\AsIs{2022-08-30}
+\item[Version]\AsIs{0.1-0}
+\item[Description]\AsIs{The 'agghoo' procedure is an alternative to usual cross-validation.
+Instead of choosing the best model trained on V subsamples, it determines
+a winner model for each subsample, and then aggregate the V outputs.
+For the details, see ``Aggregated hold-out'' by Guillaume Maillard,
+Sylvain Arlot, Matthieu Lerasle (2021) <}\Rhref{https://arxiv.org/abs/1909.04890}{arXiv:1909.04890}\AsIs{>
+published in Journal of Machine Learning Research 22(20):1--55.}
+\item[Author]\AsIs{Sylvain Arlot }\email{sylvain.arlot@universite-paris-saclay.fr}\AsIs{ [cph,ctb],
+Benjamin Auder }\email{benjamin.auder@universite-paris-saclay.fr}\AsIs{ [aut,cre,cph],
+Melina Gallopin }\email{melina.gallopin@universite-paris-saclay.fr}\AsIs{ [cph,ctb],
+Matthieu Lerasle }\email{matthieu.lerasle@universite-paris-saclay.fr}\AsIs{ [cph,ctb],
+Guillaume Maillard }\email{guillaume.maillard@uni.lu}\AsIs{ [cph,ctb]}
+\item[Maintainer]\AsIs{Benjamin Auder }\email{benjamin.auder@universite-paris-saclay.fr}\AsIs{}
+\item[Depends]\AsIs{R (>= 3.5.0)}
+\item[Imports]\AsIs{class, parallel, R6, rpart, FNN}
+\item[Suggests]\AsIs{roxygen2}
+\item[URL]\AsIs{}\url{https://git.auder.net/?p=agghoo.git}\AsIs{}
+\item[License]\AsIs{MIT + file LICENSE}
+\item[RoxygenNote]\AsIs{7.2.1}
+\item[Collate]\AsIs{'compareTo.R' 'agghoo.R' 'R6_AgghooCV.R' 'R6_Model.R'
+'checks.R' 'utils.R' 'A_NAMESPACE.R'}
+\item[NeedsCompilation]\AsIs{no}
+\end{description}
+\Rdcontents{\R{} topics documented:}
+\printindex{}
+\end{document}
diff --git a/agghoo.Rcheck/agghoo/DESCRIPTION b/agghoo.Rcheck/agghoo/DESCRIPTION

new file mode 100644 (file)

index 0000000..cb86199
--- /dev/null
+++ b/agghoo.Rcheck/agghoo/DESCRIPTION
@@ -0,0 +1,27 @@
+Package: agghoo
+Title: Aggregated Hold-Out Cross Validation
+Date: 2022-08-30
+Version: 0.1-0
+Description: The 'agghoo' procedure is an alternative to usual cross-validation.
+    Instead of choosing the best model trained on V subsamples, it determines
+    a winner model for each subsample, and then aggregate the V outputs.
+    For the details, see "Aggregated hold-out" by Guillaume Maillard,
+    Sylvain Arlot, Matthieu Lerasle (2021) <arXiv:1909.04890>
+    published in Journal of Machine Learning Research 22(20):1--55.
+Author: Sylvain Arlot <sylvain.arlot@universite-paris-saclay.fr> [cph,ctb],
+    Benjamin Auder <benjamin.auder@universite-paris-saclay.fr> [aut,cre,cph],
+    Melina Gallopin <melina.gallopin@universite-paris-saclay.fr> [cph,ctb],
+    Matthieu Lerasle <matthieu.lerasle@universite-paris-saclay.fr> [cph,ctb],
+    Guillaume Maillard <guillaume.maillard@uni.lu> [cph,ctb]
+Maintainer: Benjamin Auder <benjamin.auder@universite-paris-saclay.fr>
+Depends: R (>= 3.5.0)
+Imports: class, parallel, R6, rpart, FNN
+Suggests: roxygen2
+URL: https://git.auder.net/?p=agghoo.git
+License: MIT + file LICENSE
+RoxygenNote: 7.2.1
+Collate: 'compareTo.R' 'agghoo.R' 'R6_AgghooCV.R' 'R6_Model.R'
+        'checks.R' 'utils.R' 'A_NAMESPACE.R'
+NeedsCompilation: no
+Packaged: 2022-09-09 15:45:56 UTC; auder
+Built: R 4.2.1; ; 2022-09-09 15:46:05 UTC; unix
diff --git a/agghoo.Rcheck/agghoo/LICENSE b/agghoo.Rcheck/agghoo/LICENSE

new file mode 100644 (file)

index 0000000..094ff81
--- /dev/null
+++ b/agghoo.Rcheck/agghoo/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2021-2022
+COPYRIGHT HOLDER: Sylvain Arlot, Benjamin Auder, Melina Gallopin, Matthieu Lerasle, Guillaume Maillard
diff --git a/agghoo.Rcheck/agghoo/Meta/Rd.rds b/agghoo.Rcheck/agghoo/Meta/Rd.rds

new file mode 100644 (file)

index 0000000..f7bb5f4

Binary files /dev/null and b/agghoo.Rcheck/agghoo/Meta/Rd.rds differ
diff --git a/agghoo.Rcheck/agghoo/Meta/features.rds b/agghoo.Rcheck/agghoo/Meta/features.rds

new file mode 100644 (file)

index 0000000..3dc8fb5

Binary files /dev/null and b/agghoo.Rcheck/agghoo/Meta/features.rds differ
diff --git a/agghoo.Rcheck/agghoo/Meta/hsearch.rds b/agghoo.Rcheck/agghoo/Meta/hsearch.rds

new file mode 100644 (file)

index 0000000..051a2b7

Binary files /dev/null and b/agghoo.Rcheck/agghoo/Meta/hsearch.rds differ
diff --git a/agghoo.Rcheck/agghoo/Meta/links.rds b/agghoo.Rcheck/agghoo/Meta/links.rds

new file mode 100644 (file)

index 0000000..ba5b13a

Binary files /dev/null and b/agghoo.Rcheck/agghoo/Meta/links.rds differ
diff --git a/agghoo.Rcheck/agghoo/Meta/nsInfo.rds b/agghoo.Rcheck/agghoo/Meta/nsInfo.rds

new file mode 100644 (file)

index 0000000..ca0be5e

Binary files /dev/null and b/agghoo.Rcheck/agghoo/Meta/nsInfo.rds differ
diff --git a/agghoo.Rcheck/agghoo/Meta/package.rds b/agghoo.Rcheck/agghoo/Meta/package.rds

new file mode 100644 (file)

index 0000000..138494f

Binary files /dev/null and b/agghoo.Rcheck/agghoo/Meta/package.rds differ
diff --git a/agghoo.Rcheck/agghoo/NAMESPACE b/agghoo.Rcheck/agghoo/NAMESPACE

new file mode 100644 (file)

index 0000000..7bbddef
--- /dev/null
+++ b/agghoo.Rcheck/agghoo/NAMESPACE
@@ -0,0 +1,13 @@
+# Generated by roxygen2: do not edit by hand
+
+export(AgghooCV)
+export(Model)
+export(agghoo)
+export(compareMulti)
+export(compareRange)
+export(compareTo)
+importFrom(FNN,knn.reg)
+importFrom(R6,R6Class)
+importFrom(class,knn)
+importFrom(rpart,rpart)
+importFrom(stats,ppr)
diff --git a/agghoo.Rcheck/agghoo/R/agghoo b/agghoo.Rcheck/agghoo/R/agghoo

new file mode 100644 (file)

index 0000000..6686156
--- /dev/null
+++ b/agghoo.Rcheck/agghoo/R/agghoo
@@ -0,0 +1,27 @@
+#  File share/R/nspackloader.R
+#  Part of the R package, https://www.R-project.org
+#
+#  Copyright (C) 1995-2012 The R Core Team
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  A copy of the GNU General Public License is available at
+#  https://www.r-project.org/Licenses/
+
+local({
+    info <- loadingNamespaceInfo()
+    pkg <- info$pkgname
+    ns <- .getNamespace(as.name(pkg))
+    if (is.null(ns))
+        stop("cannot find namespace environment for ", pkg, domain = NA);
+    dbbase <- file.path(info$libname, pkg, "R", pkg)
+    lazyLoad(dbbase, ns, filter = function(n) n != ".__NAMESPACE__.")
+})
diff --git a/agghoo.Rcheck/agghoo/R/agghoo.rdb b/agghoo.Rcheck/agghoo/R/agghoo.rdb

new file mode 100644 (file)

index 0000000..4f8b251

Binary files /dev/null and b/agghoo.Rcheck/agghoo/R/agghoo.rdb differ
diff --git a/agghoo.Rcheck/agghoo/R/agghoo.rdx b/agghoo.Rcheck/agghoo/R/agghoo.rdx

new file mode 100644 (file)

index 0000000..2ce7cb7

Binary files /dev/null and b/agghoo.Rcheck/agghoo/R/agghoo.rdx differ
diff --git a/agghoo.Rcheck/agghoo/help/AnIndex b/agghoo.Rcheck/agghoo/help/AnIndex

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/agghoo.Rcheck/agghoo/help/agghoo.rdb b/agghoo.Rcheck/agghoo/help/agghoo.rdb

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/agghoo.Rcheck/agghoo/help/agghoo.rdx b/agghoo.Rcheck/agghoo/help/agghoo.rdx

new file mode 100644 (file)

index 0000000..c28f3f9

Binary files /dev/null and b/agghoo.Rcheck/agghoo/help/agghoo.rdx differ
diff --git a/agghoo.Rcheck/agghoo/help/aliases.rds b/agghoo.Rcheck/agghoo/help/aliases.rds

new file mode 100644 (file)

index 0000000..291dab0

Binary files /dev/null and b/agghoo.Rcheck/agghoo/help/aliases.rds differ
diff --git a/agghoo.Rcheck/agghoo/help/paths.rds b/agghoo.Rcheck/agghoo/help/paths.rds

new file mode 100644 (file)

index 0000000..3d2b25e

Binary files /dev/null and b/agghoo.Rcheck/agghoo/help/paths.rds differ
diff --git a/agghoo.Rcheck/agghoo/html/00Index.html b/agghoo.Rcheck/agghoo/html/00Index.html

new file mode 100644 (file)

index 0000000..84eed59
--- /dev/null
+++ b/agghoo.Rcheck/agghoo/html/00Index.html
@@ -0,0 +1,24 @@
+<!DOCTYPE html>
+<html>
+<head><title>R: Aggregated Hold-Out Cross Validation</title>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
+<link rel="stylesheet" type="text/css" href="R.css" />
+</head><body><div class="container">
+<h1> Aggregated Hold-Out Cross Validation
+<img class="toplogo" src="../../../doc/html/Rlogo.svg" alt="[R logo]" />
+</h1>
+<hr/>
+<div style="text-align: center;">
+<a href="../../../doc/html/packages.html"><img class="arrow" src="../../../doc/html/left.jpg" alt="[Up]" /></a>
+<a href="../../../doc/html/index.html"><img class="arrow" src="../../../doc/html/up.jpg" alt="[Top]" /></a>
+</div><h2>Documentation for package &lsquo;agghoo&rsquo; version 0.1-0</h2>
+
+<ul><li><a href="../DESCRIPTION">DESCRIPTION file</a>.</li>
+</ul>
+
+<h2>Help Pages</h2>
+
+
+There are no help pages in this package
+</div></body></html>
diff --git a/agghoo.Rcheck/agghoo/html/R.css b/agghoo.Rcheck/agghoo/html/R.css

new file mode 100644 (file)

index 0000000..2ef6cd6
--- /dev/null
+++ b/agghoo.Rcheck/agghoo/html/R.css
@@ -0,0 +1,120 @@
+@media screen {
+    .container {
+       padding-right: 10px;
+       padding-left: 10px;
+       margin-right: auto;
+       margin-left: auto;
+       max-width: 900px;
+    }
+}
+
+.rimage img { /* from knitr - for examples and demos */
+    width: 96%;
+    margin-left: 2%;
+}      
+
+.katex { font-size: 1.1em; }
+
+code {
+    color: inherit;
+    background: inherit;
+}
+
+body {
+    line-height: 1.4;
+    background: white;
+    color: black;
+}
+
+a:link {
+    background: white;
+    color: blue;
+}
+
+a:visited {
+    background: white;
+    color: rgb(50%, 0%, 50%);
+}
+
+h1 {
+    background: white;
+    color: rgb(55%, 55%, 55%);
+    font-family: monospace;
+    font-size: 1.4em; /* x-large; */
+    text-align: center;
+}
+
+h2 {
+    background: white;
+    color: rgb(40%, 40%, 40%);
+    font-family: monospace;
+    font-size: 1.2em; /* large; */
+    text-align: center;
+}
+
+h3 {
+    background: white;
+    color: rgb(40%, 40%, 40%);
+    font-family: monospace;
+    font-size: 1.2em; /* large; */
+}
+
+h4 {
+    background: white;
+    color: rgb(40%, 40%, 40%);
+    font-family: monospace;
+    font-style: italic;
+    font-size: 1.2em; /* large; */
+}
+
+h5 {
+    background: white;
+    color: rgb(40%, 40%, 40%);
+    font-family: monospace;
+}
+
+h6 {
+    background: white;
+    color: rgb(40%, 40%, 40%);
+    font-family: monospace;
+    font-style: italic;
+}
+
+img.toplogo {
+    width: 4em;
+    vertical-align: middle;
+}
+
+img.arrow {
+    width: 30px;
+    height: 30px;
+    border: 0;
+}
+
+span.acronym {
+    font-size: small;
+}
+
+span.env {
+    font-family: monospace;
+}
+
+span.file {
+    font-family: monospace;
+}
+
+span.option{
+    font-family: monospace;
+}
+
+span.pkg {
+    font-weight: bold;
+}
+
+span.samp{
+    font-family: monospace;
+}
+
+div.vignettes a:hover {
+    background: rgb(85%, 85%, 85%);
+}
diff --git a/agghoo_0.1-0.tar.gz b/agghoo_0.1-0.tar.gz

new file mode 100644 (file)

index 0000000..719e7e9

Binary files /dev/null and b/agghoo_0.1-0.tar.gz differ
diff --git a/man/AgghooCV.Rd b/man/AgghooCV.Rd

deleted file mode 100644 (file)

index 97d4c41..0000000
--- a/man/AgghooCV.Rd
+++ /dev/null
@@ -1,116 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/R6_AgghooCV.R
-\name{AgghooCV}
-\alias{AgghooCV}
-\title{R6 class with agghoo functions fit() and predict().}
-\description{
-Class encapsulating the methods to run to obtain the best predictor
-from the list of models (see 'Model' class).
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-new}{\code{AgghooCV$new()}}
-\item \href{#method-fit}{\code{AgghooCV$fit()}}
-\item \href{#method-predict}{\code{AgghooCV$predict()}}
-\item \href{#method-getParams}{\code{AgghooCV$getParams()}}
-\item \href{#method-clone}{\code{AgghooCV$clone()}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-new"></a>}}
-\if{latex}{\out{\hypertarget{method-new}{}}}
-\subsection{Method \code{new()}}{
-Create a new AgghooCV object.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{AgghooCV$new(data, target, task, gmodel, loss)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{data}}{Matrix or data.frame}
-
-\item{\code{target}}{Vector of targets (generally numeric or factor)}
-
-\item{\code{task}}{"regression" or "classification".
-Default: classification if target not numeric.}
-
-\item{\code{gmodel}}{Generic model returning a predictive function
-Default: tree if mixed data, knn/ppr otherwise.}
-
-\item{\code{loss}}{Function assessing the error of a prediction
-Default: error rate or mean(abs(error)).}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-fit"></a>}}
-\if{latex}{\out{\hypertarget{method-fit}{}}}
-\subsection{Method \code{fit()}}{
-Fit an agghoo model.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{AgghooCV$fit(CV = NULL)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{CV}}{List describing cross-validation to run. Slots: \cr
-  - type: 'vfold' or 'MC' for Monte-Carlo (default: MC) \cr
-  - V: number of runs (default: 10) \cr
-  - test_size: percentage of data in the test dataset, for MC
-    (irrelevant for V-fold). Default: 0.2. \cr
-  - shuffle: wether or not to shuffle data before V-fold.
-    Irrelevant for Monte-Carlo; default: TRUE \cr
-Default (if NULL): type="MC", V=10, test_size=0.2}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-predict"></a>}}
-\if{latex}{\out{\hypertarget{method-predict}{}}}
-\subsection{Method \code{predict()}}{
-Predict an agghoo model (after calling fit())
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{AgghooCV$predict(X)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{X}}{Matrix or data.frame to predict}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-getParams"></a>}}
-\if{latex}{\out{\hypertarget{method-getParams}{}}}
-\subsection{Method \code{getParams()}}{
-Return the list of V best parameters (after calling fit())
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{AgghooCV$getParams()}\if{html}{\out{</div>}}
-}
-
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{AgghooCV$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/CVvoting_core.Rd b/man/CVvoting_core.Rd

deleted file mode 100644 (file)

index 2de00e0..0000000
--- a/man/CVvoting_core.Rd
+++ /dev/null
@@ -1,12 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/compareTo.R
-\name{CVvoting_core}
-\alias{CVvoting_core}
-\title{CVvoting_core}
-\usage{
-CVvoting_core(data, target, task, gmodel, params, loss, CV)
-}
-\description{
-"voting" cross-validation method, added here as an example.
-Parameters are described in ?agghoo and ?AgghooCV
-}
diff --git a/man/CVvoting_run.Rd b/man/CVvoting_run.Rd

deleted file mode 100644 (file)

index 9aad2fe..0000000
--- a/man/CVvoting_run.Rd
+++ /dev/null
@@ -1,13 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/compareTo.R
-\name{CVvoting_run}
-\alias{CVvoting_run}
-\title{CVvoting_run}
-\usage{
-CVvoting_run(dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ...)
-}
-\description{
-Run and eval the voting cross-validation procedure.
-Parameters are rather explicit except "floss", which corresponds to the
-"final" loss function, applied to compute the error on testing dataset.
-}
diff --git a/man/Model.Rd b/man/Model.Rd

deleted file mode 100644 (file)

index 0e52101..0000000
--- a/man/Model.Rd
+++ /dev/null
@@ -1,109 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/R6_Model.R
-\name{Model}
-\alias{Model}
-\title{R6 class representing a (generic) model.}
-\description{
-"Model" class, containing a (generic) learning function, which from
-data + target [+ params] returns a prediction function X --> y.
-Parameters for cross-validation are either provided or estimated.
-Model family can be chosen among "tree", "ppr" and "knn" for now.
-}
-\section{Public fields}{
-\if{html}{\out{<div class="r6-fields">}}
-\describe{
-\item{\code{nmodels}}{Number of parameters (= number of [predictive] models)}
-}
-\if{html}{\out{</div>}}
-}
-\section{Methods}{
-\subsection{Public methods}{
-\itemize{
-\item \href{#method-new}{\code{Model$new()}}
-\item \href{#method-get}{\code{Model$get()}}
-\item \href{#method-getParam}{\code{Model$getParam()}}
-\item \href{#method-clone}{\code{Model$clone()}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-new"></a>}}
-\if{latex}{\out{\hypertarget{method-new}{}}}
-\subsection{Method \code{new()}}{
-Create a new generic model.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Model$new(data, target, task, gmodel = NULL, params = NULL)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{data}}{Matrix or data.frame}
-
-\item{\code{target}}{Vector of targets (generally numeric or factor)}
-
-\item{\code{task}}{"regression" or "classification"}
-
-\item{\code{gmodel}}{Generic model returning a predictive function; chosen
-automatically given data and target nature if not provided.}
-
-\item{\code{params}}{List of parameters for cross-validation (each defining a model)}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-get"></a>}}
-\if{latex}{\out{\hypertarget{method-get}{}}}
-\subsection{Method \code{get()}}{
-Returns the model at index "index", trained on dataHO/targetHO.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Model$get(dataHO, targetHO, index)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{dataHO}}{Matrix or data.frame}
-
-\item{\code{targetHO}}{Vector of targets (generally numeric or factor)}
-
-\item{\code{index}}{Index of the model in 1...nmodels}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-getParam"></a>}}
-\if{latex}{\out{\hypertarget{method-getParam}{}}}
-\subsection{Method \code{getParam()}}{
-Returns the parameter at index "index".
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Model$getParam(index)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{index}}{Index of the model in 1...nmodels}
-}
-\if{html}{\out{</div>}}
-}
-}
-\if{html}{\out{<hr>}}
-\if{html}{\out{<a id="method-clone"></a>}}
-\if{latex}{\out{\hypertarget{method-clone}{}}}
-\subsection{Method \code{clone()}}{
-The objects of this class are cloneable with this method.
-\subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{Model$clone(deep = FALSE)}\if{html}{\out{</div>}}
-}
-
-\subsection{Arguments}{
-\if{html}{\out{<div class="arguments">}}
-\describe{
-\item{\code{deep}}{Whether to make a deep clone.}
-}
-\if{html}{\out{</div>}}
-}
-}
-}
diff --git a/man/agghoo.Rd b/man/agghoo.Rd

deleted file mode 100644 (file)

index 38730eb..0000000
--- a/man/agghoo.Rd
+++ /dev/null
@@ -1,57 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/agghoo.R
-\name{agghoo}
-\alias{agghoo}
-\title{agghoo}
-\usage{
-agghoo(data, target, task = NULL, gmodel = NULL, params = NULL, loss = NULL)
-}
-\arguments{
-\item{data}{Data frame or matrix containing the data in lines.}
-
-\item{target}{The target values to predict. Generally a vector,
-but possibly a matrix in the case of "soft classification".}
-
-\item{task}{"classification" or "regression". Default:
-regression if target is numerical, classification otherwise.}
-
-\item{gmodel}{A "generic model", which is a function returning a predict
-function (taking X as only argument) from the tuple
-(dataHO, targetHO, param), where 'HO' stands for 'Hold-Out',
-referring to cross-validation. Cross-validation is run on an array
-of 'param's. See params argument. Default: see R6::Model.}
-
-\item{params}{A list of parameters. Often, one list cell is just a
-numerical value, but in general it could be of any type.
-Default: see R6::Model.}
-
-\item{loss}{A function assessing the error of a prediction.
-Arguments are y1 and y2 (comparing a prediction to known values).
-loss(y1, y2) --> real number (error). Default: see R6::AgghooCV.}
-}
-\value{
-An R6::AgghooCV object o. Then, call o$fit() and finally o$predict(newData)
-}
-\description{
-Run the (core) agghoo procedure.
-Arguments specify the list of models, their parameters and the
-cross-validation settings, among others.
-}
-\examples{
-# Regression:
-a_reg <- agghoo(iris[,-c(2,5)], iris[,2])
-a_reg$fit()
-pr <- a_reg$predict(iris[,-c(2,5)] + rnorm(450, sd=0.1))
-# Classification
-a_cla <- agghoo(iris[,-5], iris[,5])
-a_cla$fit()
-pc <- a_cla$predict(iris[,-5] + rnorm(600, sd=0.1))
-
-}
-\references{
-Guillaume Maillard, Sylvain Arlot, Matthieu Lerasle. "Aggregated hold-out".
-Journal of Machine Learning Research 22(20):1--55, 2021.
-}
-\seealso{
-Function \code{\link{compareTo}}
-}
diff --git a/man/agghoo_run.Rd b/man/agghoo_run.Rd

deleted file mode 100644 (file)

index a4f565d..0000000
--- a/man/agghoo_run.Rd
+++ /dev/null
@@ -1,13 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/compareTo.R
-\name{agghoo_run}
-\alias{agghoo_run}
-\title{agghoo_run}
-\usage{
-agghoo_run(dataTrain, dataTest, targetTrain, targetTest, floss, verbose, ...)
-}
-\description{
-Run and eval the agghoo procedure.
-Parameters are rather explicit except "floss", which corresponds to the
-"final" loss function, applied to compute the error on testing dataset.
-}
diff --git a/man/compareMulti.Rd b/man/compareMulti.Rd

deleted file mode 100644 (file)

index 8bf537e..0000000
--- a/man/compareMulti.Rd
+++ /dev/null
@@ -1,39 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/compareTo.R
-\name{compareMulti}
-\alias{compareMulti}
-\title{compareMulti}
-\usage{
-compareMulti(
-  data,
-  target,
-  method_s,
-  N = 100,
-  nc = NA,
-  floss = NULL,
-  verbose = TRUE,
-  ...
-)
-}
-\arguments{
-\item{data}{Data matrix or data.frame}
-
-\item{target}{Target vector (generally)}
-
-\item{method_s}{Either a single function, or a list
-(examples: agghoo_run, standardCV_run)}
-
-\item{N}{Number of calls to method(s)}
-
-\item{nc}{Number of cores. Set to parallel::detectCores() if undefined.
-Set it to any value <=1 to say "no parallelism".}
-
-\item{floss}{Loss function to compute the error on testing dataset.}
-
-\item{verbose}{TRUE to print task numbers and "Errors:" in the end.}
-
-\item{...}{arguments passed to method_s function(s)}
-}
-\description{
-Run compareTo N times in parallel.
-}
diff --git a/man/compareRange.Rd b/man/compareRange.Rd

deleted file mode 100644 (file)

index c884e43..0000000
--- a/man/compareRange.Rd
+++ /dev/null
@@ -1,39 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/compareTo.R
-\name{compareRange}
-\alias{compareRange}
-\title{compareRange}
-\usage{
-compareRange(
-  data,
-  target,
-  method_s,
-  N = 100,
-  nc = NA,
-  floss = NULL,
-  V_range = c(10, 15, 20),
-  ...
-)
-}
-\arguments{
-\item{data}{Data matrix or data.frame}
-
-\item{target}{Target vector (generally)}
-
-\item{method_s}{Either a single function, or a list
-(examples: agghoo_run, standardCV_run)}
-
-\item{N}{Number of calls to method(s)}
-
-\item{nc}{Number of cores. Set to parallel::detectCores() if undefined.
-Set it to any value <=1 to say "no parallelism".}
-
-\item{floss}{Loss function to compute the error on testing dataset.}
-
-\item{V_range}{Values of V to be tested.}
-
-\item{...}{arguments passed to method_s function(s)}
-}
-\description{
-Run compareMulti on several values of the parameter V.
-}
diff --git a/man/compareTo.Rd b/man/compareTo.Rd

deleted file mode 100644 (file)

index d5c1ab4..0000000
--- a/man/compareTo.Rd
+++ /dev/null
@@ -1,35 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/compareTo.R
-\name{compareTo}
-\alias{compareTo}
-\title{compareTo}
-\usage{
-compareTo(
-  data,
-  target,
-  method_s,
-  rseed = -1,
-  floss = NULL,
-  verbose = TRUE,
-  ...
-)
-}
-\arguments{
-\item{data}{Data matrix or data.frame}
-
-\item{target}{Target vector (generally)}
-
-\item{method_s}{Either a single function, or a list
-(examples: agghoo_run, standardCV_run)}
-
-\item{rseed}{Seed of the random generator (-1 means "random seed")}
-
-\item{floss}{Loss function to compute the error on testing dataset.}
-
-\item{verbose}{TRUE to request methods to be verbose.}
-
-\item{...}{arguments passed to method_s function(s)}
-}
-\description{
-Compare a list of learning methods (or run only one), on data/target.
-}
diff --git a/man/standardCV_core.Rd b/man/standardCV_core.Rd

deleted file mode 100644 (file)

index 42ad88c..0000000
--- a/man/standardCV_core.Rd
+++ /dev/null
@@ -1,12 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/compareTo.R
-\name{standardCV_core}
-\alias{standardCV_core}
-\title{standardCV_core}
-\usage{
-standardCV_core(data, target, task, gmodel, params, loss, CV)
-}
-\description{
-Cross-validation method, added here as an example.
-Parameters are described in ?agghoo and ?AgghooCV
-}
diff --git a/man/standardCV_run.Rd b/man/standardCV_run.Rd

deleted file mode 100644 (file)

index 0937764..0000000
--- a/man/standardCV_run.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/compareTo.R
-\name{standardCV_run}
-\alias{standardCV_run}
-\title{standardCV_run}
-\usage{
-standardCV_run(
-  dataTrain,
-  dataTest,
-  targetTrain,
-  targetTest,
-  floss,
-  verbose,
-  ...
-)
-}
-\description{
-Run and eval the standard cross-validation procedure.
-Parameters are rather explicit except "floss", which corresponds to the
-"final" loss function, applied to compute the error on testing dataset.
-}
author	Benjamin Auder <benjamin.auder@somewhere>
	Fri, 9 Sep 2022 15:49:44 +0000 (17:49 +0200)
committer	Benjamin Auder <benjamin.auder@somewhere>
	Fri, 9 Sep 2022 15:49:44 +0000 (17:49 +0200)
DESCRIPTION		patch \| blob \| blame \| history
LICENSE		patch \| blob \| blame \| history
NAMESPACE		patch \| blob \| blame \| history
R/R6_Model.R		patch \| blob \| blame \| history
R/compareTo.R		patch \| blob \| blame \| history
agghoo.Rcheck/00_pkg_src/agghoo/DESCRIPTION	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/00_pkg_src/agghoo/LICENSE	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/00_pkg_src/agghoo/NAMESPACE	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/00_pkg_src/agghoo/R/A_NAMESPACE.R	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/00_pkg_src/agghoo/R/R6_AgghooCV.R	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/00_pkg_src/agghoo/R/R6_Model.R	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/00_pkg_src/agghoo/R/agghoo.R	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/00_pkg_src/agghoo/R/checks.R	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/00_pkg_src/agghoo/R/compareTo.R	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/00_pkg_src/agghoo/R/utils.R	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/00_pkg_src/agghoo/README.md	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/00_pkg_src/agghoo/TODO	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/00_pkg_src/agghoo/example/example.R	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/00_pkg_src/agghoo/test/TODO	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/00check.log	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/00install.out	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/Rdlatex.log	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo-manual.tex	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/DESCRIPTION	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/LICENSE	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/Meta/Rd.rds	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/Meta/features.rds	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/Meta/hsearch.rds	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/Meta/links.rds	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/Meta/nsInfo.rds	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/Meta/package.rds	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/NAMESPACE	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/R/agghoo	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/R/agghoo.rdb	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/R/agghoo.rdx	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/help/AnIndex	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/help/agghoo.rdb	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/help/agghoo.rdx	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/help/aliases.rds	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/help/paths.rds	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/html/00Index.html	[new file with mode: 0644]	patch \| blob
agghoo.Rcheck/agghoo/html/R.css	[new file with mode: 0644]	patch \| blob
agghoo_0.1-0.tar.gz	[new file with mode: 0644]	patch \| blob
man/AgghooCV.Rd	[deleted file]	patch \| blob \| blame \| history
man/CVvoting_core.Rd	[deleted file]	patch \| blob \| blame \| history
man/CVvoting_run.Rd	[deleted file]	patch \| blob \| blame \| history
man/Model.Rd	[deleted file]	patch \| blob \| blame \| history
man/agghoo.Rd	[deleted file]	patch \| blob \| blame \| history
man/agghoo_run.Rd	[deleted file]	patch \| blob \| blame \| history
man/compareMulti.Rd	[deleted file]	patch \| blob \| blame \| history
man/compareRange.Rd	[deleted file]	patch \| blob \| blame \| history
man/compareTo.Rd	[deleted file]	patch \| blob \| blame \| history
man/standardCV_core.Rd	[deleted file]	patch \| blob \| blame \| history
man/standardCV_run.Rd	[deleted file]	patch \| blob \| blame \| history