I have balanced the dataset using SMOTE like following:
sm = over_sampling.SMOTE(random_state=1000)
X_train_smote, y_train_smote = sm.fit_resample(X_train, y_train)
Then splat like the data set like following:
X_train, X_test, y_train, y_test = train_test_split(X_train_smote, y_train_smote, test_size=0.3, random_state=220)
but when I want to train lightGBM like following:
model = LGBMClassifier(n_estimators=100, random_state=22)
kfolds = StratifiedKFold(n_splits = 3, shuffle = True, random_state = 100)
#Range of Parameters
param_dist = {'n_estimators': stats.randint(100,500,4000),
'learning_rate': stats.uniform(0.01,0.03),
'subsample': stats.uniform(0.3, 0.9),
'max_depth': [3, 4, 5, 6, 7,20],
'colsample_bytree': stats.uniform(0.3, 0.5),
'min_child_weight': [2, 3, 4,10],
'num_leaves':[10,15,26],
'reg_alpha':[2,4,6],
'reg_lambda':[10,20],
'min_split_gain':[0.0001,0.002,0.007],
'class_weight':{0:1,1:1.0122}
}
LGB = RandomizedSearchCV(estimator=model, param_distributions=param_dist, cv= 5,n_iter=10, verbose=1)
LGB.fit(X_train,y_train,verbose=1)
It gives error which can be traced in the following lines:
Fitting 5 folds for each of 10 candidates, totalling 50 fits
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-55-30ee1c88db6c> in <module>
Fitting the model on train data
---> 35 LGB.fit(X_train,y_train,verbose=1)
36
37 #The best hyper parameters set
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py in
inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py
in fit(self, X, y, groups, **fit_params)
878 refit_start_time = time.time()
879 if y is not None:
--> 880 self.best_estimator_.fit(X, y, **fit_params)
881 else:
882 self.best_estimator_.fit(X, **fit_params)
C:\ProgramData\Anaconda3\lib\site-packages\lightgbm\sklearn.py in fit(self, X, y, sample_weight, init_score, eval_set, eval_names, eval_sample_weight, eval_class_weight, eval_init_score, eval_metric, early_stopping_rounds, verbose, feature_name, categorical_feature, callbacks, init_model)
893 eval_metric=eval_metric, early_stopping_rounds=early_stopping_rounds,
894 verbose=verbose, feature_name=feature_name, categorical_feature=categorical_feature,
--> 895 callbacks=callbacks, init_model=init_model)
896 return self
897
C:\ProgramData\Anaconda3\lib\site-packages\lightgbm\sklearn.py in fit(self, X, y, sample_weight, init_score, group, eval_set, eval_names, eval_sample_weight, eval_class_weight, eval_init_score, eval_group, eval_metric, early_stopping_rounds, verbose, feature_name, categorical_feature, callbacks, init_model)
623 self._class_weight = self.class_weight
624 if self._class_weight is not None:
--> 625 class_sample_weight = _LGBMComputeSampleWeight(self._class_weight, y)
626 if sample_weight is None or len(sample_weight) == 0:
627 sample_weight = class_sample_weight
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\class_weight.py in compute_sample_weight(class_weight, y, indices)
165 weight_k = compute_class_weight(class_weight_k,
166 classes=classes_full,
--> 167 y=y_full)
168
169 weight_k = weight_k[np.searchsorted(classes_full, y_full)]
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\class_weight.py in compute_class_weight(class_weight, classes, y)
44 raise ValueError("classes should include all valid labels that can "
45 "be in y")
---> 46 if class_weight is None or len(class_weight) == 0:
47 # uniform class weights
48 weight = np.ones(classes.shape[0], dtype=np.float64, order='C')
TypeError: object of type 'float' has no len()
Any help please how to fix this or any other method of training LGBM
Thank you