How to solve this issues "AssertionError: Must have at least 1 validation dataset for early stopping"

188 Views Asked by At

I am developing a propensity scorecard model in xgboost in 1.6.2 version and while running below code it gives me errors. I searched lots of relevant question on stack overflow but wasn't able to find best solution. how to change it in my code?

from sklearn.model_selection import  GridSearchCV
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from xgboost import XGBClassifier

model=XGBClassifier()
from skopt import BayesSearchCV

bayes_cv_tuner = BayesSearchCV(
    estimator = XGBClassifier(
        n_jobs = 8,
        objective = 'binary:logistic',
        eval_metric = 'auc',
        silent=1,
        tree_method='approx', early_stopping_rounds = 10
    ),
    search_spaces = {
        'learning_rate': (0.001, 0.005),
        'min_child_weight': (150, 500),
        'max_depth': (2, 4),
        'max_delta_step': (1, 10),
        'subsample': (0.5, .9),
        'colsample_bytree': (0.6, 0.8),
        'colsample_bylevel': (0.6, 0.9),
        'reg_lambda':(100,500),
        'reg_alpha': (0.005,.05),
        'gamma':  (2.5,10),
        'n_estimators': (25, 100),
        'scale_pos_weight': (1, 1000)
    },    
    scoring = 'roc_auc',
    cv = StratifiedKFold(
        n_splits=5,
        shuffle=True,
        random_state=42
    ),
    n_jobs = 8,
    n_iter = 80,   
    refit = True,
    random_state = 42
)

def status_print(optim_result):
    """Status callback during bayesian hyperparameter search"""
    
    # Get all the models tested so far in DataFrame format
    all_models = pd.DataFrame(bayes_cv_tuner.cv_results_)    
    
    # Get current parameters and the best parameters    
    best_params = pd.Series(bayes_cv_tuner.best_params_)
    print('Model #{}\nBest ROC-AUC: {}\nBest params: {}\n'.format(
        len(all_models),
        np.round(bayes_cv_tuner.best_score_, 4),
        bayes_cv_tuner.best_params_
    ))
    
    # Save all model results
    clf_name = bayes_cv_tuner.estimator.__class__.__name__
    all_models.to_csv(clf_name+"_cv_results.csv")

result = bayes_cv_tuner.fit(data_train.iloc[:,1:-1],data_train.iloc[:,-1], callback=status_print)
0

There are 0 best solutions below