While training LGBM with balanced dataset gives following error

202 Views Asked by At

I have balanced the dataset using SMOTE like following:

sm = over_sampling.SMOTE(random_state=1000)
X_train_smote, y_train_smote = sm.fit_resample(X_train, y_train)

Then splat like the data set like following:

X_train, X_test, y_train, y_test = train_test_split(X_train_smote, y_train_smote, test_size=0.3, random_state=220)

but when I want to train lightGBM like following:

model = LGBMClassifier(n_estimators=100, random_state=22)

kfolds = StratifiedKFold(n_splits = 3, shuffle = True, random_state = 100)

#Range of Parameters
param_dist = {'n_estimators': stats.randint(100,500,4000),
          'learning_rate': stats.uniform(0.01,0.03),
          'subsample': stats.uniform(0.3, 0.9),
          'max_depth': [3, 4, 5, 6, 7,20],
          'colsample_bytree': stats.uniform(0.3, 0.5),
          'min_child_weight': [2, 3, 4,10],
          'num_leaves':[10,15,26],
          'reg_alpha':[2,4,6],
          'reg_lambda':[10,20],
          'min_split_gain':[0.0001,0.002,0.007],
          'class_weight':{0:1,1:1.0122}
         }

LGB = RandomizedSearchCV(estimator=model, param_distributions=param_dist, cv= 5,n_iter=10, verbose=1)


LGB.fit(X_train,y_train,verbose=1)

It gives error which can be traced in the following lines:

    Fitting 5 folds for each of 10 candidates, totalling 50 fits
    ---------------------------------------------------------------------------
    TypeError                                 Traceback (most recent call last)
    <ipython-input-55-30ee1c88db6c> in <module>

Fitting the model on train data

    ---> 35 LGB.fit(X_train,y_train,verbose=1)
     36 
     37 #The best hyper parameters set

    C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py in 
    inner_f(*args, **kwargs)
     61             extra_args = len(args) - len(all_args)
     62             if extra_args <= 0:
    ---> 63                 return f(*args, **kwargs)
     64 
     65             # extra_args > 0

    C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py 
    in fit(self, X, y, groups, **fit_params)
    878             refit_start_time = time.time()
    879             if y is not None:
    --> 880                 self.best_estimator_.fit(X, y, **fit_params)
    881             else:
    882                 self.best_estimator_.fit(X, **fit_params)

    C:\ProgramData\Anaconda3\lib\site-packages\lightgbm\sklearn.py in fit(self, X, y, sample_weight, init_score, eval_set, eval_names, eval_sample_weight, eval_class_weight, eval_init_score, eval_metric, early_stopping_rounds, verbose, feature_name, categorical_feature, callbacks, init_model)
    893                     eval_metric=eval_metric, early_stopping_rounds=early_stopping_rounds,
    894                     verbose=verbose, feature_name=feature_name, categorical_feature=categorical_feature,
    --> 895                     callbacks=callbacks, init_model=init_model)
    896         return self
    897 

    C:\ProgramData\Anaconda3\lib\site-packages\lightgbm\sklearn.py in fit(self, X, y, sample_weight, init_score, group, eval_set, eval_names, eval_sample_weight, eval_class_weight, eval_init_score, eval_group, eval_metric, early_stopping_rounds, verbose, feature_name, categorical_feature, callbacks, init_model)
    623             self._class_weight = self.class_weight
    624         if self._class_weight is not None:
    --> 625             class_sample_weight = _LGBMComputeSampleWeight(self._class_weight, y)
    626             if sample_weight is None or len(sample_weight) == 0:
    627                 sample_weight = class_sample_weight

    C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
     61             extra_args = len(args) - len(all_args)
     62             if extra_args <= 0:
    ---> 63                 return f(*args, **kwargs)
     64 
     65             # extra_args > 0

     C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\class_weight.py in compute_sample_weight(class_weight, y, indices)
    165             weight_k = compute_class_weight(class_weight_k,
    166                                             classes=classes_full,
    --> 167                                             y=y_full)
     168 
    169         weight_k = weight_k[np.searchsorted(classes_full, y_full)]

    C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
     61             extra_args = len(args) - len(all_args)
     62             if extra_args <= 0:
    ---> 63                 return f(*args, **kwargs)
     64 
     65             # extra_args > 0

    C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\class_weight.py in compute_class_weight(class_weight, classes, y)
     44         raise ValueError("classes should include all valid labels that can "
     45                          "be in y")
    ---> 46     if class_weight is None or len(class_weight) == 0:
     47         # uniform class weights
     48         weight = np.ones(classes.shape[0], dtype=np.float64, order='C')

   TypeError: object of type 'float' has no len()

Any help please how to fix this or any other method of training LGBM

Thank you

0

There are 0 best solutions below