I have some lines code following.
library(mlr3)
library(mlr3pipelines)
library(mlr3extralearners)
library(DALEX)
library(DALEXtra)
library(tidyverse)
data = tsk("german_credit")$data()
data = data[, c("credit_risk", "amount", "purpose", "age")]
task = TaskClassif$new("german_credit", backend = data, target = "credit_risk")
g = po("imputemedian") %>>%
po("imputeoor") %>>%
po("fixfactors") %>>%
po("encodeimpact") %>>%
lrn("classif.lightgbm")
gl = GraphLearner$new(g)
gl$train(task)
Break down for evaluate contribution of each variable
lgbm_explain <- explain_mlr3(
gl,
data = task$data(),
y = ifelse(task$data()$credit_risk == 'bad', 1, 0),
label = "Lightgbm",
colorize = FALSE
)
# Test for first obs
newdata <- data[1,]
lgbm_predict_part <- predict_parts(lgbm_explain, new_observation = newdata)
plot(lgbm_predict_part)
To use predict_parts. I tried to using loop by using this function, but it run very slow.
fnc_predict_parts <- function(data, i){
newdata <- data %>% slice(i)
pred_part <- predict_parts(lgbm_explain, new_observation = newdata)
return(pred_part)
}
list_pred_parts <- nrow(data) %>%
seq_len() %>%
map_dfr(fnc_predict_parts, data = data, .id = 'id')
May i ask, how to run parallel predict_parts? or any algorithms can run for overall data?