Error message while tuning the threshold for classification learners using stacking with the mlr3 package

129 Views Asked by At

I've coded a graph that performs stacking using the mlr3 package. Here is the graph: enter image description here

The objective is for the graph to return weighted average predictions via the learner classif.avg. I am currently trying to make the graph more complex by tuning the threshold of predictions. I followed this example using po("threshold"). However, I encountered this error message:

Error in .__LearnerClassifAvg__prepare_data(self = self, private = private, : Assertion on 'all(fcts) || !any(fcts)' failed: Must be TRUE. This happened in PipeOp classif.avg's $train()

So, I have two questions:

1) Why am I getting this error message?

2) I managed to retrieve the average predictions for a new data frame (via $predict_newdata) using the level 1 learner, but is it possible to retrieve predictions for a new data frame from each level 0 learner?

Any help would be greatly appreciated.

I have reproduced the error message with data("bradypus"). Here's the code:

library(maxnet)
## Task
data("bradypus")
bradypus_data <- bradypus[, !(colnames(bradypus) %in% c("ecoreg"))]
bradypus_data$presence <- as.factor(bradypus_data$presence)
task <- mlr3::as_task_classif(x = bradypus_data, target = presence, positive = "1")

## Split the task into training and testing sets
partitioned_task <- mlr3::partition(task)

## Arguments to run the function "auto_tuner"
## Define the learners for each machine learning model
lrn_rpart <- mlr3::lrn("classif.rpart", predict_type = "prob")
lrn_kknn <- mlr3::lrn("classif.kknn", predict_type = "prob")
lrn_svm <- mlr3::lrn("classif.svm", predict_type = "prob", type  = "C-classification")
lrn_xgboost <- mlr3::lrn("classif.xgboost", predict_type = "prob")

## Define a learner "classif.av"
lrn_avg <- mlr3pipelines::LearnerClassifAvg$new(id = "classif.avg")
lrn_avg$predict_type  <- "prob"
lrn_avg$param_set$values$measure <- "classif.auc"

## Define the other arguments
tuner <- mlr3tuning:::tnr("mbo")
measure <- mlr3::msr("classif.auc")
terminator <- mlr3tuning::trm("evals", n_evals = 5)
resampling <- mlr3::rsmp ("cv", folds = 2)

## Define the pipelines based on the level 0 learners
po_lrn_rpart_cv <- mlr3pipelines::po("learner_cv", learner = (mlr3tuningspaces::lts(lrn_rpart) %>>% po("threshold")))
po_lrn_kknn_cv <- mlr3pipelines::po("learner_cv", learner = (mlr3tuningspaces::lts(lrn_kknn) %>>% po("threshold")))
po_lrn_svm_cv <- mlr3pipelines::po("learner_cv", learner = (mlr3tuningspaces::lts(lrn_svm) %>>% po("threshold")))
po_lrn_xgboost_cv <- mlr3pipelines::po("learner_cv", learner = (mlr3tuningspaces::lts(lrn_xgboost) %>>% po("threshold")))

## Define a search space for the hyperparameter "threshold.thresholds"
po_lrn_rpart_cv$learner$param_set$set_values(threshold.thresholds = to_tune(p_dbl(lower = 0, upper = 1)))
po_lrn_kknn_cv$learner$param_set$set_values(threshold.thresholds = to_tune(p_dbl(lower = 0, upper = 1)))
po_lrn_svm_cv$learner$param_set$set_values(threshold.thresholds = to_tune(p_dbl(lower = 0, upper = 1)))
po_lrn_xgboost_cv$learner$param_set$set_values(threshold.thresholds = to_tune(p_dbl(lower = 0, upper = 1)))

## Combine the pipelines
graph_0 <- mlr3pipelines::gunion(list(po_lrn_rpart_cv, po_lrn_kknn_cv, po_lrn_svm_cv, po_lrn_xgboost_cv)) %>>%
  mlr3pipelines::po("featureunion")

## Combine the graph with the learner "lrn_avg"
graph_1 <- graph_0 %>>% lrn_avg
## graph_1$plot()
lrn_graph_1 <- mlr3::as_learner(graph_1)

## Run the function "auto_tuner"
run_auto_tuner <- mlr3tuning::auto_tuner(tuner = tuner, 
                                    learner = lrn_graph_1,
                                    resampling = resampling,
                                    measure = measure,
                                    terminator = terminator)
                                    
run_auto_tuner$train(task)

  ## Retrieve the predictions
  predictions <- run_auto_tuner$predict_newdata(newdata = task)
  ## predictions$prob
0

There are 0 best solutions below