X-Git-Url: https://git.auder.net/?p=agghoo.git;a=blobdiff_plain;f=agghoo.Rcheck%2F00_pkg_src%2Fagghoo%2FR%2FR6_Model.R;fp=agghoo.Rcheck%2F00_pkg_src%2Fagghoo%2FR%2FR6_Model.R;h=0000000000000000000000000000000000000000;hp=d48825eb9f8d1a3f9c872a2e8757c12e34ca69ad;hb=16906f6e8c432b811ddf99da1b18a2a357a75235;hpb=97f16440280a40a49c4898a75942e374880bfca3 diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_Model.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_Model.R deleted file mode 100644 index d48825e..0000000 --- a/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_Model.R +++ /dev/null @@ -1,157 +0,0 @@ -#' @title R6 class representing a (generic) model. -#' -#' @description -#' "Model" class, containing a (generic) learning function, which from -#' data + target [+ params] returns a prediction function X --> y. -#' Parameters for cross-validation are either provided or estimated. -#' Model family can be chosen among "tree", "ppr" and "knn" for now. -#' -#' @importFrom FNN knn.reg -#' @importFrom class knn -#' @importFrom stats ppr -#' @importFrom rpart rpart -#' -#' @export -Model <- R6::R6Class("Model", - public = list( - #' @field nmodels Number of parameters (= number of [predictive] models) - nmodels = NA, - #' @description Create a new generic model. - #' @param data Matrix or data.frame - #' @param target Vector of targets (generally numeric or factor) - #' @param task "regression" or "classification" - #' @param gmodel Generic model returning a predictive function; chosen - #' automatically given data and target nature if not provided. - #' @param params List of parameters for cross-validation (each defining a model) - initialize = function(data, target, task, gmodel = NULL, params = NULL) { - if (is.null(gmodel)) { - # (Generic) model not provided - all_numeric <- is.numeric(as.matrix(data)) - if (!all_numeric) - # At least one non-numeric column: use trees - gmodel = "tree" - else - # Numerical data - gmodel = ifelse(task == "regression", "ppr", "knn") - } - if (is.null(params)) - # Here, gmodel is a string (= its family), - # because a custom model must be given with its parameters. - params <- as.list(private$getParams(gmodel, data, target, task)) - private$params <- params - if (is.character(gmodel)) - gmodel <- private$getGmodel(gmodel, task) - private$gmodel <- gmodel - self$nmodels <- length(private$params) - }, - #' @description - #' Returns the model at index "index", trained on dataHO/targetHO. - #' @param dataHO Matrix or data.frame - #' @param targetHO Vector of targets (generally numeric or factor) - #' @param index Index of the model in 1...nmodels - get = function(dataHO, targetHO, index) { - private$gmodel(dataHO, targetHO, private$params[[index]]) - }, - #' @description - #' Returns the parameter at index "index". - #' @param index Index of the model in 1...nmodels - getParam = function(index) { - private$params[[index]] - } - ), - private = list( - # No need to expose model or parameters list - gmodel = NULL, - params = NULL, - # Main function: given a family, return a generic model, which in turn - # will output a predictive model from data + target + params. - getGmodel = function(family, task) { - if (family == "tree") { - function(dataHO, targetHO, param) { - base::require(rpart) - method <- ifelse(task == "classification", "class", "anova") - if (is.null(colnames(dataHO))) - colnames(dataHO) <- paste0("V", 1:ncol(dataHO)) - df <- data.frame(cbind(dataHO, target=targetHO)) - model <- rpart::rpart(target ~ ., df, method=method, control=list(cp=param)) - if (task == "regression") - type <- "vector" - else { - if (is.null(dim(targetHO))) - type <- "class" - else - type <- "prob" - } - function(X) { - if (is.null(colnames(X))) - colnames(X) <- paste0("V", 1:ncol(X)) - predict(model, as.data.frame(X), type=type) - } - } - } - else if (family == "ppr") { - function(dataHO, targetHO, param) { - model <- stats::ppr(dataHO, targetHO, nterms=param) - function(X) predict(model, X) - } - } - else if (family == "knn") { - if (task == "classification") { - function(dataHO, targetHO, param) { - base::require(class) - function(X) class::knn(dataHO, X, cl=targetHO, k=param) - } - } - else { - function(dataHO, targetHO, param) { - base::require(FNN) - function(X) FNN::knn.reg(dataHO, X, y=targetHO, k=param)$pred - } - } - } - }, - # Return a default list of parameters, given a gmodel family - getParams = function(family, data, target, task) { - if (family == "tree") { - # Run rpart once to obtain a CV grid for parameter cp - base::require(rpart) - df <- data.frame(cbind(data, target=target)) - ctrl <- list( - cp = 0, - minsplit = 2, - minbucket = 1, - xval = 0) - method <- ifelse(task == "classification", "class", "anova") - r <- rpart(target ~ ., df, method=method, control=ctrl) - cps <- r$cptable[-1,1] - if (length(cps) <= 1) - stop("No cross-validation possible: select another model") - if (length(cps) <= 11) - return (cps) - step <- (length(cps) - 1) / 10 - cps[unique(round(seq(1, length(cps), step)))] - } - else if (family == "ppr") - # This is nterms in ppr() function - 1:10 - else if (family == "knn") { - n <- nrow(data) - # Choose ~10 NN values - K <- length(unique(target)) - if (n <= 10) - return (1:(n-1)) - sqrt_n <- sqrt(n) - step <- (2*sqrt_n - 1) / 10 - grid <- unique(round(seq(1, 2*sqrt_n, step))) - if (K == 2) { - # Common binary classification case: odd number of neighbors - for (i in 2:11) { - if (grid[i] %% 2 == 0) - grid[i] <- grid[i] + 1 #arbitrary choice - } - } - grid - } - } - ) -)