defaultLoss_classif <- function(y1, y2) { if (is.null(dim(y1))) # Standard case: "hard" classification mean(y1 != y2) else { # "Soft" classification: predict() outputs a probability matrix # In this case "target" could be in matrix form. if (!is.null(dim(y2))) mean(rowSums(abs(y1 - y2))) else { # Or not: y2 is a "factor". y2 <- as.character(y2) # NOTE: the user should provide target in matrix form because # matching y2 with columns is rather inefficient! names <- colnames(y1) positions <- list() for (idx in seq_along(names)) positions[[ names[idx] ]] <- idx mean(vapply( seq_along(y2), function(idx) sum(abs(y1[idx,] - positions[[ y2[idx] ]])), 0)) } } } defaultLoss_regress <- function(y1, y2) { mean(abs(y1 - y2)) } # TODO: allow strings like "MSE", "abs" etc checkLoss <- function(loss, task) { if (!is.null(loss) && !is.function(loss)) stop("loss: function(y1, y2) --> Real") if (is.null(loss)) { loss <- if (task == "classification") { defaultLoss_classif } else { defaultLoss_regress } } loss } checkCV <- function(CV) { if (is.null(CV)) CV <- list(type="MC", V=10, test_size=0.2, shuffle=TRUE) else { if (!is.list(CV)) stop("CV: list of type('MC'|'vfold'), V(integer, [test_size, shuffle]") if (is.null(CV$type)) { warning("CV$type not provided: set to MC") CV$type <- "MC" } if (is.null(CV$V)) { warning("CV$V not provided: set to 10") CV$V <- 10 } if (CV$type == "MC" && is.null(CV$test_size)) CV$test_size <- 0.2 if (CV$type == "vfold" && is.null(CV$shuffle)) CV$shuffle <- TRUE } CV } checkDaTa <- function(data, target) { if (!is.data.frame(data) && !is.matrix(data)) stop("data: data.frame or matrix") if (is.data.frame(target) || is.matrix(target)) { if (!is.numeric(target)) stop("multi-columns target must be a probability matrix") if (nrow(target) != nrow(data) || ncol(target) == 1) stop("target probability matrix does not match data size") } else if (!is.numeric(target) && !is.factor(target) && !is.character(target)) stop("target: numeric, factor or character vector") } checkTask <- function(task, target) { if (!is.null(task)) task <- match.arg(task, c("classification", "regression")) task <- ifelse(is.numeric(target), "regression", "classification") } checkModPar <- function(gmodel, params) { if (is.character(gmodel)) gmodel <- match.arg(gmodel, c("knn", "ppr", "rf", "tree")) else if (!is.null(gmodel) && !is.function(gmodel)) stop("gmodel: function(dataHO, targetHO, param) --> function(X) --> y") if (is.numeric(params) || is.character(params)) params <- as.list(params) if (!is.list(params) && !is.null(params)) stop("params: numerical, character, or list (passed to model)") if (is.function(gmodel) && !is.list(params)) stop("params must be provided when using a custom model") if (is.list(params) && is.null(gmodel)) stop("model (or family) must be provided when using custom params") list(gmodel=gmodel, params=params) }