X-Git-Url: https://git.auder.net/img/rock_paper_scissors_lizard_spock.gif?a=blobdiff_plain;f=R%2FR6_Model.R;h=96f892d4b8a3e9ec1c0e410f34b03652d907b5f9;hb=7b5193cdf5eb7041710c52368764feeacbb36a7c;hp=9d7fc70bf3b10e483873b0d1e2b7694978f2a128;hpb=c5946158cae8f7b9400f107c533e745f835eb20f;p=agghoo.git diff --git a/R/R6_Model.R b/R/R6_Model.R index 9d7fc70..96f892d 100644 --- a/R/R6_Model.R +++ b/R/R6_Model.R @@ -6,6 +6,13 @@ #' Parameters for cross-validation are either provided or estimated. #' Model family can be chosen among "rf", "tree", "ppr" and "knn" for now. #' +#' @importFrom FNN knn.reg +#' @importFrom class knn +#' @importFrom stats ppr +#' @importFrom randomForest randomForest +#' @importFrom rpart rpart +#' @importFrom caret var_seq +#' #' @export Model <- R6::R6Class("Model", public = list( @@ -18,8 +25,8 @@ Model <- R6::R6Class("Model", #' @param gmodel Generic model returning a predictive function; chosen #' automatically given data and target nature if not provided. #' @param params List of parameters for cross-validation (each defining a model) - initialize = function(data, target, task, gmodel = NA, params = NA) { - if (is.na(gmodel)) { + initialize = function(data, target, task, gmodel = NULL, params = NULL) { + if (is.null(gmodel)) { # (Generic) model not provided all_numeric <- is.numeric(as.matrix(data)) if (!all_numeric) @@ -30,7 +37,7 @@ Model <- R6::R6Class("Model", # Numerical data gmodel = ifelse(task == "regression", "ppr", "knn") } - if (is.na(params)) + if (is.null(params)) # Here, gmodel is a string (= its family), # because a custom model must be given with its parameters. params <- as.list(private$getParams(gmodel, data, target)) @@ -42,18 +49,23 @@ Model <- R6::R6Class("Model", }, #' @description #' Returns the model at index "index", trained on dataHO/targetHO. - #' index is between 1 and self$nmodels. #' @param dataHO Matrix or data.frame #' @param targetHO Vector of targets (generally numeric or factor) #' @param index Index of the model in 1...nmodels get = function(dataHO, targetHO, index) { private$gmodel(dataHO, targetHO, private$params[[index]]) + }, + #' @description + #' Returns the parameter at index "index". + #' @param index Index of the model in 1...nmodels + getParam = function(index) { + private$params[[index]] } ), private = list( # No need to expose model or parameters list - gmodel = NA, - params = NA, + gmodel = NULL, + params = NULL, # Main function: given a family, return a generic model, which in turn # will output a predictive model from data + target + params. getGmodel = function(family, task) { @@ -61,9 +73,15 @@ Model <- R6::R6Class("Model", function(dataHO, targetHO, param) { require(rpart) method <- ifelse(task == "classification", "class", "anova") + if (is.null(colnames(dataHO))) + colnames(dataHO) <- paste0("V", 1:ncol(dataHO)) df <- data.frame(cbind(dataHO, target=targetHO)) - model <- rpart(target ~ ., df, method=method, control=list(cp=param)) - function(X) predict(model, X) + model <- rpart::rpart(target ~ ., df, method=method, control=list(cp=param)) + function(X) { + if (is.null(colnames(X))) + colnames(X) <- paste0("V", 1:ncol(X)) + predict(model, as.data.frame(X)) + } } } else if (family == "rf") { @@ -82,9 +100,17 @@ Model <- R6::R6Class("Model", } } else if (family == "knn") { - function(dataHO, targetHO, param) { - require(class) - function(X) class::knn(dataHO, X, cl=targetHO, k=param) + if (task == "classification") { + function(dataHO, targetHO, param) { + require(class) + function(X) class::knn(dataHO, X, cl=targetHO, k=param) + } + } + else { + function(dataHO, targetHO, param) { + require(FNN) + function(X) FNN::knn.reg(dataHO, X, y=targetHO, k=param)$pred + } } } }, @@ -95,18 +121,17 @@ Model <- R6::R6Class("Model", require(rpart) df <- data.frame(cbind(data, target=target)) ctrl <- list( + cp = 0, minsplit = 2, minbucket = 1, - maxcompete = 0, - maxsurrogate = 0, - usesurrogate = 0, - xval = 0, - surrogatestyle = 0, - maxdepth = 30) + xval = 0) r <- rpart(target ~ ., df, method="class", control=ctrl) cps <- r$cptable[-1,1] - if (length(cps) <= 11) + if (length(cps) <= 11) { + if (length(cps == 0)) + stop("No cross-validation possible: select another model") return (cps) + } step <- (length(cps) - 1) / 10 cps[unique(round(seq(1, length(cps), step)))] }