error when using majority vote, pipeline and gridsearch

56 Views Asked by At

I have a data set and I need to perform feature selection on it, with that I will have 4 different models where I need to use the majority vote. Until then it was working but now I need to use gridsearch to check the parameters for my model but I'm having difficulties. I appreciate if someone can help me.

 from sklearn import datasets
    from sklearn.model_selection import train_test_split
    from sklearn.model_selection import GridSearchCV
    from sklearn.metrics import classification_report
    from pandas import read_csv
    
    
    from mlxtend.classifier import EnsembleVoteClassifier
    from sklearn.metrics import accuracy_score
    from sklearn2pmml import make_pmml_pipeline
    from sklearn2pmml import sklearn2pmml
    from sklearn.compose import ColumnTransformer, make_column_transformer
    from sklearn.pipeline import Pipeline
    from sklearn.impute import SimpleImputer
    from sklearn.preprocessing import StandardScaler, OneHotEncoder
    from sklearn2pmml.pipeline import PMMLPipeline
    from sklearn.ensemble._voting import VotingClassifier
    
    from mlxtend.feature_selection import ColumnSelector
    from sklearn.metrics import roc_auc_score
    from sklearn.metrics import cohen_kappa_score
    from sklearn.metrics import f1_score
    from sklearn.metrics import matthews_corrcoef
    from sklearn.metrics.cluster import fowlkes_mallows_score
    from sklearn.metrics import precision_recall_curve
    from sklearn.metrics import average_precision_score
    from sklearn.model_selection import KFold
    
    from sklearn.metrics import precision_score #precision
    from sklearn.metrics import recall_score #recall
    import numpy as np
    from sklearn.metrics import precision_recall_fscore_support
    from sklearn.ensemble import RandomForestClassifier
    
    print(__doc__)
    
    fold1_train = 'D:/ARTIGO/TREINAMENTO.CSV'
    df_fold1_train = read_csv(fold1_train, header=None)
    data_fold1_train = df_fold1_train.values
    
    fold1_test = 'D:/ARTIGO/TESTE.CSV'
    df_fold1_test = read_csv(fold1_test, header=None)
    data_fold1_test = df_fold1_test.values
    
    X_train_fold1 = data_fold1_train[:, :-1]
    y_train_fold1 = data_fold1_train[:, -1]
    
    X_test_fold1 = data_fold1_test[:, :-1]
    y_test_fold1 = data_fold1_test[:, -1]
    
    
    #features selection
    features1 = [2, 5, 7]
    features2 = [0, 1, 4, 5, 7]
    features3 = [0, 1, 4, 5, 6]
    features4 = [1, 4]
    
    
    numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])
    preprocessor1 = ColumnTransformer(transformers=[('numerical', numeric_transformer, features1)])
    preprocessor2 = ColumnTransformer(transformers=[('numerical', numeric_transformer, features2)])
    preprocessor3 = ColumnTransformer(transformers=[('numerical', numeric_transformer, features3)])
    preprocessor4 = ColumnTransformer(transformers=[('numerical', numeric_transformer, features4)])
    
    #RandomForest
    pipeline = PMMLPipeline([
      ('classifier', VotingClassifier([
        ("pipe1", Pipeline(steps=[('preprocessor1', preprocessor1),('classifier1', SVC())])),
        ("pipe2", Pipeline(steps=[('preprocessor2', preprocessor2),('classifier2', SVC())])),
        ("pipe3", Pipeline(steps=[('preprocessor3', preprocessor3),('classifier3', SVC())])),
        ("pipe4", Pipeline(steps=[('preprocessor4', preprocessor4),('classifier4', SVC())]))
      ]))
    ])
    
    # Set the parameters by cross-validation
    tuned_parameters = [{'classifier__kernel': ['rbf', 'linear']
                        # , 'pipe1__gamma': [1e-3, 1e-4]
                        }]
      
    scores = ['precision']
    
    
    for score in scores:
        print("# Tuning hyper-parameters for %s" % score)
        print()
    
        
        cv = KFold(n_splits=5)   
        
        clf = GridSearchCV(
            estimator=pipeline, param_grid=tuned_parameters,  n_jobs=-1, verbose=1, cv=cv, scoring='%s_macro' % score
        )
        clf.fit(X_train_fold1, y_train_fold1)
    
        print("Best parameters set found on development set:")
        print()
        print(clf.best_params_)
        print()
        print("Grid scores on development set:")
        print()
        means = clf.cv_results_['mean_test_score']
        stds = clf.cv_results_['std_test_score']
        for mean, std, params in zip(means, stds, clf.cv_results_['params']):
            print("%0.3f (+/-%0.03f) for %r"
                  % (mean, std * 2, params))
        print()
    
        print("Detailed classification report:")
        print()
        print("The model is trained on the full development set.")
        print("The scores are computed on the full evaluation set.")
        print()
        y_true, y_pred = y_test_fold1, clf.predict(X_test_fold1)
        print(classification_report(y_true, y_pred))
        print()

I have the following error

ValueError: Invalid parameter kernel for estimator VotingClassifier(estimators=[('pipe1',
                              Pipeline(steps=[('preprocessor1',
                                               ColumnTransformer(transformers=[('numerical',
                                                                                Pipeline(steps=[('scaler',
                                                                                                 StandardScaler())]),
                                                                                [2,
                                                                                 5,
                                                                                 7])])),
                                              ('classifier1', SVC())])),
                             ('pipe2',
                              Pipeline(steps=[('preprocessor2',
                                               ColumnTransformer(transformers=[('numerical',
                                                                                Pipeline(steps=[('scaler',
                                                                                                 StandardScaler())]),
                                                                                [0,
                                                                                 1,
                                                                                 4,
                                                                                 5,
                                                                                 7])])),
                                              ('...())])),
                             ('pipe3',
                              Pipeline(steps=[('preprocessor3',
                                               ColumnTransformer(transformers=[('numerical',
                                                                                Pipeline(steps=[('scaler',
                                                                                                 StandardScaler())]),
                                                                                [0,
                                                                                 1,
                                                                                 4,
                                                                                 5,
                                                                                 6])])),
                                              ('classifier3', SVC())])),
                             ('pipe4',
                              Pipeline(steps=[('preprocessor4',
                                               ColumnTransformer(transformers=[('numerical',
                                                                                Pipeline(steps=[('scaler',
                                                                                                 StandardScaler())]),
                                                                                [1,
                                                                                 4])])),
                                              ('classifier4', SVC())]))]). Check the list of available parameters with `estimator.get_params().keys()`.
"""
0

There are 0 best solutions below