X-Git-Url: https://git.auder.net/?p=agghoo.git;a=blobdiff_plain;f=agghoo.Rcheck%2F00_pkg_src%2Fagghoo%2FR%2FR6_Model.R;fp=agghoo.Rcheck%2F00_pkg_src%2Fagghoo%2FR%2FR6_Model.R;h=d48825eb9f8d1a3f9c872a2e8757c12e34ca69ad;hp=0000000000000000000000000000000000000000;hb=97f16440280a40a49c4898a75942e374880bfca3;hpb=3b8affec63125c3816a3d15f0f49776dc14867a2 diff --git a/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_Model.R b/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_Model.R new file mode 100644 index 0000000..d48825e --- /dev/null +++ b/agghoo.Rcheck/00_pkg_src/agghoo/R/R6_Model.R @@ -0,0 +1,157 @@ +#' @title R6 class representing a (generic) model. +#' +#' @description +#' "Model" class, containing a (generic) learning function, which from +#' data + target [+ params] returns a prediction function X --> y. +#' Parameters for cross-validation are either provided or estimated. +#' Model family can be chosen among "tree", "ppr" and "knn" for now. +#' +#' @importFrom FNN knn.reg +#' @importFrom class knn +#' @importFrom stats ppr +#' @importFrom rpart rpart +#' +#' @export +Model <- R6::R6Class("Model", + public = list( + #' @field nmodels Number of parameters (= number of [predictive] models) + nmodels = NA, + #' @description Create a new generic model. + #' @param data Matrix or data.frame + #' @param target Vector of targets (generally numeric or factor) + #' @param task "regression" or "classification" + #' @param gmodel Generic model returning a predictive function; chosen + #' automatically given data and target nature if not provided. + #' @param params List of parameters for cross-validation (each defining a model) + initialize = function(data, target, task, gmodel = NULL, params = NULL) { + if (is.null(gmodel)) { + # (Generic) model not provided + all_numeric <- is.numeric(as.matrix(data)) + if (!all_numeric) + # At least one non-numeric column: use trees + gmodel = "tree" + else + # Numerical data + gmodel = ifelse(task == "regression", "ppr", "knn") + } + if (is.null(params)) + # Here, gmodel is a string (= its family), + # because a custom model must be given with its parameters. + params <- as.list(private$getParams(gmodel, data, target, task)) + private$params <- params + if (is.character(gmodel)) + gmodel <- private$getGmodel(gmodel, task) + private$gmodel <- gmodel + self$nmodels <- length(private$params) + }, + #' @description + #' Returns the model at index "index", trained on dataHO/targetHO. + #' @param dataHO Matrix or data.frame + #' @param targetHO Vector of targets (generally numeric or factor) + #' @param index Index of the model in 1...nmodels + get = function(dataHO, targetHO, index) { + private$gmodel(dataHO, targetHO, private$params[[index]]) + }, + #' @description + #' Returns the parameter at index "index". + #' @param index Index of the model in 1...nmodels + getParam = function(index) { + private$params[[index]] + } + ), + private = list( + # No need to expose model or parameters list + gmodel = NULL, + params = NULL, + # Main function: given a family, return a generic model, which in turn + # will output a predictive model from data + target + params. + getGmodel = function(family, task) { + if (family == "tree") { + function(dataHO, targetHO, param) { + base::require(rpart) + method <- ifelse(task == "classification", "class", "anova") + if (is.null(colnames(dataHO))) + colnames(dataHO) <- paste0("V", 1:ncol(dataHO)) + df <- data.frame(cbind(dataHO, target=targetHO)) + model <- rpart::rpart(target ~ ., df, method=method, control=list(cp=param)) + if (task == "regression") + type <- "vector" + else { + if (is.null(dim(targetHO))) + type <- "class" + else + type <- "prob" + } + function(X) { + if (is.null(colnames(X))) + colnames(X) <- paste0("V", 1:ncol(X)) + predict(model, as.data.frame(X), type=type) + } + } + } + else if (family == "ppr") { + function(dataHO, targetHO, param) { + model <- stats::ppr(dataHO, targetHO, nterms=param) + function(X) predict(model, X) + } + } + else if (family == "knn") { + if (task == "classification") { + function(dataHO, targetHO, param) { + base::require(class) + function(X) class::knn(dataHO, X, cl=targetHO, k=param) + } + } + else { + function(dataHO, targetHO, param) { + base::require(FNN) + function(X) FNN::knn.reg(dataHO, X, y=targetHO, k=param)$pred + } + } + } + }, + # Return a default list of parameters, given a gmodel family + getParams = function(family, data, target, task) { + if (family == "tree") { + # Run rpart once to obtain a CV grid for parameter cp + base::require(rpart) + df <- data.frame(cbind(data, target=target)) + ctrl <- list( + cp = 0, + minsplit = 2, + minbucket = 1, + xval = 0) + method <- ifelse(task == "classification", "class", "anova") + r <- rpart(target ~ ., df, method=method, control=ctrl) + cps <- r$cptable[-1,1] + if (length(cps) <= 1) + stop("No cross-validation possible: select another model") + if (length(cps) <= 11) + return (cps) + step <- (length(cps) - 1) / 10 + cps[unique(round(seq(1, length(cps), step)))] + } + else if (family == "ppr") + # This is nterms in ppr() function + 1:10 + else if (family == "knn") { + n <- nrow(data) + # Choose ~10 NN values + K <- length(unique(target)) + if (n <= 10) + return (1:(n-1)) + sqrt_n <- sqrt(n) + step <- (2*sqrt_n - 1) / 10 + grid <- unique(round(seq(1, 2*sqrt_n, step))) + if (K == 2) { + # Common binary classification case: odd number of neighbors + for (i in 2:11) { + if (grid[i] %% 2 == 0) + grid[i] <- grid[i] + 1 #arbitrary choice + } + } + grid + } + } + ) +)