X-Git-Url: https://git.auder.net/?a=blobdiff_plain;f=R%2FR6_Model.R;h=171966676cd17cb942706176c089963c4197da77;hb=HEAD;hp=05cb7d8dd4bd52cb261a4110b56103799a83c1f8;hpb=17ea2f13e0c32c107db20677750bd7a98bb7e0f8;p=agghoo.git diff --git a/R/R6_Model.R b/R/R6_Model.R index 05cb7d8..d48825e 100644 --- a/R/R6_Model.R +++ b/R/R6_Model.R @@ -4,14 +4,12 @@ #' "Model" class, containing a (generic) learning function, which from #' data + target [+ params] returns a prediction function X --> y. #' Parameters for cross-validation are either provided or estimated. -#' Model family can be chosen among "rf", "tree", "ppr" and "knn" for now. +#' Model family can be chosen among "tree", "ppr" and "knn" for now. #' #' @importFrom FNN knn.reg #' @importFrom class knn #' @importFrom stats ppr -#' @importFrom randomForest randomForest #' @importFrom rpart rpart -#' @importFrom caret var_seq #' #' @export Model <- R6::R6Class("Model", @@ -30,9 +28,8 @@ Model <- R6::R6Class("Model", # (Generic) model not provided all_numeric <- is.numeric(as.matrix(data)) if (!all_numeric) - # At least one non-numeric column: use random forests or trees - # TODO: 4 = arbitrary magic number... - gmodel = ifelse(ncol(data) >= 4, "rf", "tree") + # At least one non-numeric column: use trees + gmodel = "tree" else # Numerical data gmodel = ifelse(task == "regression", "ppr", "knn") @@ -71,7 +68,7 @@ Model <- R6::R6Class("Model", getGmodel = function(family, task) { if (family == "tree") { function(dataHO, targetHO, param) { - require(rpart) + base::require(rpart) method <- ifelse(task == "classification", "class", "anova") if (is.null(colnames(dataHO))) colnames(dataHO) <- paste0("V", 1:ncol(dataHO)) @@ -92,15 +89,6 @@ Model <- R6::R6Class("Model", } } } - else if (family == "rf") { - function(dataHO, targetHO, param) { - require(randomForest) - if (task == "classification" && !is.factor(targetHO)) - targetHO <- as.factor(targetHO) - model <- randomForest::randomForest(dataHO, targetHO, mtry=param) - function(X) predict(model, X) - } - } else if (family == "ppr") { function(dataHO, targetHO, param) { model <- stats::ppr(dataHO, targetHO, nterms=param) @@ -110,13 +98,13 @@ Model <- R6::R6Class("Model", else if (family == "knn") { if (task == "classification") { function(dataHO, targetHO, param) { - require(class) + base::require(class) function(X) class::knn(dataHO, X, cl=targetHO, k=param) } } else { function(dataHO, targetHO, param) { - require(FNN) + base::require(FNN) function(X) FNN::knn.reg(dataHO, X, y=targetHO, k=param)$pred } } @@ -126,7 +114,7 @@ Model <- R6::R6Class("Model", getParams = function(family, data, target, task) { if (family == "tree") { # Run rpart once to obtain a CV grid for parameter cp - require(rpart) + base::require(rpart) df <- data.frame(cbind(data, target=target)) ctrl <- list( cp = 0, @@ -143,13 +131,6 @@ Model <- R6::R6Class("Model", step <- (length(cps) - 1) / 10 cps[unique(round(seq(1, length(cps), step)))] } - else if (family == "rf") { - p <- ncol(data) - # Use caret package to obtain the CV grid of mtry values - require(caret) - caret::var_seq(p, classification = (task == "classification"), - len = min(10, p-1)) - } else if (family == "ppr") # This is nterms in ppr() function 1:10