Update package to send on CRAN
[agghoo.git] / R / R6_Model.R
CommitLineData
c5946158
BA
1#' @title R6 class representing a (generic) model.
2#'
3#' @description
4#' "Model" class, containing a (generic) learning function, which from
5#' data + target [+ params] returns a prediction function X --> y.
6#' Parameters for cross-validation are either provided or estimated.
a7ec4f8a 7#' Model family can be chosen among "tree", "ppr" and "knn" for now.
c5946158 8#'
d9a139b5
BA
9#' @importFrom FNN knn.reg
10#' @importFrom class knn
11#' @importFrom stats ppr
d9a139b5 12#' @importFrom rpart rpart
d9a139b5 13#'
c5946158
BA
14#' @export
15Model <- R6::R6Class("Model",
16 public = list(
17 #' @field nmodels Number of parameters (= number of [predictive] models)
18 nmodels = NA,
19 #' @description Create a new generic model.
20 #' @param data Matrix or data.frame
21 #' @param target Vector of targets (generally numeric or factor)
22 #' @param task "regression" or "classification"
23 #' @param gmodel Generic model returning a predictive function; chosen
24 #' automatically given data and target nature if not provided.
25 #' @param params List of parameters for cross-validation (each defining a model)
d9a139b5
BA
26 initialize = function(data, target, task, gmodel = NULL, params = NULL) {
27 if (is.null(gmodel)) {
c5946158
BA
28 # (Generic) model not provided
29 all_numeric <- is.numeric(as.matrix(data))
30 if (!all_numeric)
a7ec4f8a
BA
31 # At least one non-numeric column: use trees
32 gmodel = "tree"
c5946158
BA
33 else
34 # Numerical data
35 gmodel = ifelse(task == "regression", "ppr", "knn")
36 }
d9a139b5 37 if (is.null(params))
c5946158
BA
38 # Here, gmodel is a string (= its family),
39 # because a custom model must be given with its parameters.
c152ea66 40 params <- as.list(private$getParams(gmodel, data, target, task))
c5946158
BA
41 private$params <- params
42 if (is.character(gmodel))
43 gmodel <- private$getGmodel(gmodel, task)
44 private$gmodel <- gmodel
45 self$nmodels <- length(private$params)
46 },
47 #' @description
48 #' Returns the model at index "index", trained on dataHO/targetHO.
c5946158
BA
49 #' @param dataHO Matrix or data.frame
50 #' @param targetHO Vector of targets (generally numeric or factor)
51 #' @param index Index of the model in 1...nmodels
52 get = function(dataHO, targetHO, index) {
53 private$gmodel(dataHO, targetHO, private$params[[index]])
504afaad
BA
54 },
55 #' @description
56 #' Returns the parameter at index "index".
57 #' @param index Index of the model in 1...nmodels
58 getParam = function(index) {
59 private$params[[index]]
c5946158
BA
60 }
61 ),
62 private = list(
63 # No need to expose model or parameters list
d9a139b5
BA
64 gmodel = NULL,
65 params = NULL,
c5946158
BA
66 # Main function: given a family, return a generic model, which in turn
67 # will output a predictive model from data + target + params.
68 getGmodel = function(family, task) {
69 if (family == "tree") {
70 function(dataHO, targetHO, param) {
97f16440 71 base::require(rpart)
c5946158 72 method <- ifelse(task == "classification", "class", "anova")
7b5193cd
BA
73 if (is.null(colnames(dataHO)))
74 colnames(dataHO) <- paste0("V", 1:ncol(dataHO))
c5946158 75 df <- data.frame(cbind(dataHO, target=targetHO))
d9a139b5 76 model <- rpart::rpart(target ~ ., df, method=method, control=list(cp=param))
17ea2f13
BA
77 if (task == "regression")
78 type <- "vector"
79 else {
80 if (is.null(dim(targetHO)))
81 type <- "class"
82 else
83 type <- "prob"
84 }
7b5193cd
BA
85 function(X) {
86 if (is.null(colnames(X)))
87 colnames(X) <- paste0("V", 1:ncol(X))
17ea2f13 88 predict(model, as.data.frame(X), type=type)
7b5193cd 89 }
c5946158
BA
90 }
91 }
c5946158
BA
92 else if (family == "ppr") {
93 function(dataHO, targetHO, param) {
94 model <- stats::ppr(dataHO, targetHO, nterms=param)
95 function(X) predict(model, X)
96 }
97 }
98 else if (family == "knn") {
d9a139b5
BA
99 if (task == "classification") {
100 function(dataHO, targetHO, param) {
97f16440 101 base::require(class)
d9a139b5
BA
102 function(X) class::knn(dataHO, X, cl=targetHO, k=param)
103 }
104 }
105 else {
106 function(dataHO, targetHO, param) {
97f16440 107 base::require(FNN)
d9a139b5
BA
108 function(X) FNN::knn.reg(dataHO, X, y=targetHO, k=param)$pred
109 }
c5946158
BA
110 }
111 }
112 },
113 # Return a default list of parameters, given a gmodel family
c152ea66 114 getParams = function(family, data, target, task) {
c5946158
BA
115 if (family == "tree") {
116 # Run rpart once to obtain a CV grid for parameter cp
97f16440 117 base::require(rpart)
c5946158
BA
118 df <- data.frame(cbind(data, target=target))
119 ctrl <- list(
7b5193cd 120 cp = 0,
c5946158
BA
121 minsplit = 2,
122 minbucket = 1,
7b5193cd 123 xval = 0)
c152ea66
BA
124 method <- ifelse(task == "classification", "class", "anova")
125 r <- rpart(target ~ ., df, method=method, control=ctrl)
c5946158 126 cps <- r$cptable[-1,1]
c152ea66
BA
127 if (length(cps) <= 1)
128 stop("No cross-validation possible: select another model")
129 if (length(cps) <= 11)
c5946158
BA
130 return (cps)
131 step <- (length(cps) - 1) / 10
132 cps[unique(round(seq(1, length(cps), step)))]
133 }
c5946158
BA
134 else if (family == "ppr")
135 # This is nterms in ppr() function
136 1:10
137 else if (family == "knn") {
138 n <- nrow(data)
139 # Choose ~10 NN values
140 K <- length(unique(target))
141 if (n <= 10)
142 return (1:(n-1))
143 sqrt_n <- sqrt(n)
144 step <- (2*sqrt_n - 1) / 10
145 grid <- unique(round(seq(1, 2*sqrt_n, step)))
146 if (K == 2) {
147 # Common binary classification case: odd number of neighbors
148 for (i in 2:11) {
149 if (grid[i] %% 2 == 0)
150 grid[i] <- grid[i] + 1 #arbitrary choice
151 }
152 }
153 grid
154 }
155 }
156 )
157)