This "undefined columns selected" error always occur when I try to tune hyper-parameters of the Cubist-learner by mlr3

29 Views Asked by At

This is the code when I tried to tune hyperparameters of the regr.cubist learner in mlr3. I first saw the error with my own data, I then took the example data set from Cubist website and it occured again.

My code is pasted here:

# load data 
library(modeldata)
data(ames, package = "modeldata")
ames$Sale_Price <- log10(ames$Sale_Price)

predictors <- 
  c("Lot_Area", "Alley", "Lot_Shape", "Neighborhood", "Bldg_Type", 
    "Year_Built", "Total_Bsmt_SF", "Central_Air", "Gr_Liv_Area", 
    "Bsmt_Full_Bath", "Bsmt_Half_Bath", "Full_Bath", "Half_Bath", 
    "TotRms_AbvGrd",  "Year_Sold", "Longitude", "Latitude")
ames$Sale_Price <- log10(ames$Sale_Price)

ames <- ames[, colnames(ames) %in% c("Sale_Price", predictors)]
# set up task

task <- as_task_regr(ames, target = "Sale_Price")

measure = msr('regr.rsq');

train_set = sample(task$row_ids, 0.80 * task$nrow)
test_set = setdiff(task$row_ids, train_set)

# set up tuner 


learner_cub <- lrn("regr.cubist", 
                   rules = to_tune(1, 5), 
                   committees = to_tune(3, 6),
                   neighbors  = 0,
                   extrapolation = 90
)

instance <- ti(task = task,
               learner = learner_cub,
               resampling = rsmp("cv", folds = 4),
               measures   = measure ,
               terminator = trm("run_time", secs = 600))

tuner = tnr("grid_search", resolution = 2, batch_size = 2)

tuner$optimize(instance)

And I saw this error message:

Error in `[.data.frame`(x, i, j, drop) : undefined columns selected
30.
stop(condition)
29.
signalConditions(obj, exclude = getOption("future.relay.immediate",
"immediateCondition"), resignal = resignal, ...)
28.
signalConditionsASAP(obj, resignal = FALSE, pos = ii)
27.
resolve.list(y, result = TRUE, stdout = stdout, signal = signal,
force = TRUE)
26.
resolve(y, result = TRUE, stdout = stdout, signal = signal, force = TRUE)
25.
value.list(fs)
24.
value(fs)
23.
future_xapply(FUN = FUN, nX = nX, chunk_args = dots, MoreArgs = MoreArgs,
get_chunk = function(X, chunk) lapply(X, FUN = `chunkWith[[`,
chunk), expr = expr, envir = envir, future.envir = future.envir,
future.globals = future.globals, future.packages = future.packages, ...
22.
future.apply::future_mapply(FUN, ..., MoreArgs = MoreArgs, SIMPLIFY = FALSE,
USE.NAMES = FALSE, future.globals = FALSE, future.packages = "mlr3",
future.seed = TRUE, future.scheduling = scheduling, future.chunk.size = chunk_size,
future.stdout = stdout)
21.
future_map(n, workhorse, task = grid$task, learner = grid$learner,
resampling = grid$resampling, iteration = grid$iteration,
param_values = grid$param_values, mode = grid$mode, MoreArgs = list(store_models = store_models,
lgr_threshold = lgr_threshold, pb = pb))
20.
benchmark(design = private$.design, store_models = self$store_models ||
self$allow_hotstart, allow_hotstart = self$allow_hotstart,
clone = character(0))
19.
.__ObjectiveTuning__.eval_many(self = self, private = private,
super = super, xss = xss, resampling = resampling)
18.
private$.eval_many(xss, resampling = list(<environment>))
17.
eval(expr, p)
16.
eval(expr, p)
15.
eval.parent(expr, n = 1L)
14.
invoke(private$.eval_many, xss, .args = self$constants$values)
13.
.__Objective__eval_many(self = self, private = private, super = super,
xss = xss)
12.
self$objective$eval_many(xss_trafoed)
11.
.__OptimInstance__eval_batch(self = self, private = private,
super = super, xdt = xdt)
10.
inst$eval_batch(data[inds])
9.
.__TunerGridSearch__.optimize(self = self, private = private,
super = super, inst = inst)
8.
private$.optimize(inst)
7.
doTryCatch(return(expr), name, parentenv, handler)
6.
tryCatchOne(expr, names, parentenv, handlers[[1L]])
5.
tryCatchList(expr, classes, parentenv, handlers)
4.
tryCatch({
private$.optimize(inst)
}, terminated_error = function(cond) {
})
3.
optimize_default(inst, self, private)
2.
.__Tuner__optimize(self = self, private = private, super = super,
inst = inst)
1.
tuner$optimize(instance)

The error repeatedly occured with my data as well as the examplary data from the cubist-website.

0

There are 0 best solutions below