I want to tune a model using a custom class probability metric "pg", which stands for partial gini coefficient. I use it on data that exists of numerical predictors and a binary factor as class label (after preprocessing with the recipe). This is the tuning code:
xgb_folds <- train %>% vfold_cv(v=5)
xgb_model <- parsnip::boost_tree(
mode = "classification",
trees = tune(),
tree_depth = tune(),
learn_rate = tune(),
loss_reduction = tune()
) %>%
set_engine("xgboost")
xgb_wf <- workflow() %>%
add_recipe(TREE_recipe) %>%
add_model(xgb_model)
xgboost_tuned <- tune::tune_grid(
object = xgb_wf,
resamples = xgb_folds,
grid = hyperparameters_XGB_tidy,
metrics = metric_set(pg),
control = tune::control_grid(verbose = TRUE)
The code above worked when I set the metric in tune_grid to roc_auc. When using pg however I get this warning: Warning message: All models failed. Run 'show_notes(.Last.tune.result)' for more information.
The .Last.tune.result contains this error:
unique notes:
────────────────────────────────────────────────────────────────────────────────────────────────────
Error in `metric_set()`:
! Failed to compute `pg()`.
Caused by error in `UseMethod()`:
! no applicable method for 'pg' applied to an object of class "c('grouped_df', 'tbl_df', 'tbl', 'data.frame')"
This is the yardstick implementation for pg I tried: (running pg_vec on class probabilities and label vectors worked as expected)
# partialGini for tidymodels
library(tidymodels, rlang)
pg_impl <- function(truth, estimate, case_weights = NULL) {
sorted_indices <- order(estimate, decreasing = TRUE)
sorted_probs <- estimate[sorted_indices]
sorted_actuals <- truth[sorted_indices]
# Select subset with PD < 0.4
subset_indices <- which(sorted_probs < 0.4)
subset_probs <- sorted_probs[subset_indices]
subset_actuals <- sorted_actuals[subset_indices]
# Check if there are both positive and negative cases in the subset
if (length(unique(subset_actuals)) > 1) {
# Calculate ROC curve for the subset
roc_subset <- pROC::roc(subset_actuals, subset_probs,
direction = "<", quiet = TRUE)
# Calculate AUC for the subset
partial_auc <- pROC::auc(roc_subset)
# Calculate partial Gini coefficient
(2 * partial_auc - 1)
} else return(NA)
}
pg_vec <- function(truth, estimate, estimator = NULL, na_rm = TRUE, case_weights = NULL, ...) {
abort_if_class_pred(truth)
estimator <- finalize_estimator(truth, estimator)
check_prob_metric(truth, estimate, case_weights, estimator)
if (na_rm) {
result <- yardstick_remove_missing(truth, estimate, case_weights)
truth <- result$truth
estimate <- result$estimate
case_weights <- result$case_weights
} else if (yardstick_any_missing(truth, estimate, case_weights)) {
return(NA_real_)
}
pg_impl(truth, estimate, case_weights = case_weights)
}
pg <- function(data, ...) {
UseMethod("pg")
}
pg <- new_prob_metric(pg, direction = "maximize")
pg.data.frame <- function(data, truth, ..., na_rm = TRUE) {
prob_metric_summarizer(
name = "pg",
fn = pg_vec,
data = data,
truth = !! enquo(truth),
...,
na_rm = na_rm)
}