Stacking classifiers (sklearn and keras models) via StackingCVClassifier problem

638 Views Asked by At

I am kind of new to using the mlxtend package and as well as the Keras package so please bear with me. I have been trying to combine predictions of various models, i.e., Random Forest, Logistic Regression, and a Neural Network model, using StackingCVClassifier. I am trying to stack these classifiers that operate on different feature subsets. Kindly see the code as follows.

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from tensorflow import keras
from keras import layers
from keras.constraints import maxnorm
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation,  Flatten, Input
from mlxtend.classifier import StackingCVClassifier
from mlxtend.feature_selection import ColumnSelector
from sklearn.pipeline import make_pipeline
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.neural_network import MLPClassifier

X, y = make_classification()
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0)

# defining neural network model
def create_model ():
    # create model
    model = Sequential()
    model.add(Dense(10, input_dim=10, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Flatten())
    optimizer= keras.optimizers.RMSprop(lr=0.001)
    model.add(Dense(units = 1, activation = 'sigmoid'))  # Compile model
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer, metrics=[keras.metrics.AUC(), 'accuracy'])
    return model

# using KerasClassifier on the neural network model
NN_clf=KerasClassifier(build_fn=create_model, epochs=5, batch_size= 5)
NN_clf._estimator_type = "classifier"

# stacking of classifiers that operate on different feature subsets
pipeline1 = make_pipeline(ColumnSelector(cols=(np.arange(0, 5, 1))), LogisticRegression())
pipeline2 = make_pipeline(ColumnSelector(cols=(np.arange(5, 10, 1))), RandomForestClassifier())
pipeline3 = make_pipeline(ColumnSelector(cols=(np.arange(10, 20, 1))), NN_clf)

# final stacking
clf = StackingCVClassifier(classifiers=[pipeline1, pipeline2, pipeline3], meta_classifier=MLPClassifier())
clf.fit(X_train, y_train)

print("Stacking model score: %.3f" % clf.score(X_val, y_val))

However, I am getting this error:

ValueError                                Traceback (most recent call last)
<ipython-input-11-ef342536824f> in <module>
     42 # final stacking
     43 clf = StackingCVClassifier(classifiers=[pipeline1, pipeline2, pipeline3], meta_classifier=MLPClassifier())
---> 44 clf.fit(X_train, y_train)
     45 
     46 print("Stacking model score: %.3f" % clf.score(X_val, y_val))

~\anaconda3\lib\site-packages\mlxtend\classifier\stacking_cv_classification.py in fit(self, X, y, groups, sample_weight)
    282                 meta_features = prediction
    283             else:
--> 284                 meta_features = np.column_stack((meta_features, prediction))
    285 
    286         if self.store_train_meta_features:

~\anaconda3\lib\site-packages\numpy\core\overrides.py in column_stack(*args, **kwargs)

~\anaconda3\lib\site-packages\numpy\lib\shape_base.py in column_stack(tup)
    654             arr = array(arr, copy=False, subok=True, ndmin=2).T
    655         arrays.append(arr)
--> 656     return _nx.concatenate(arrays, 1)
    657 
    658 

~\anaconda3\lib\site-packages\numpy\core\overrides.py in concatenate(*args, **kwargs)

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 3 dimension(s)

Please help me. Thanks!

2

There are 2 best solutions below

0
On BEST ANSWER

The error is happening because you are combining prediction from traditional ML models and DL model.

ML models are giving predictions in the shape like this (80,1) whereas DL model is predicting in shape like this (80,1,1), so there is mismatch while trying to append all the predictions.

Common workaround for this is to strip the extra dimension of predictions given by DL method to make it (80,1) instead of (80,1,1)

So, open the py file located inside: anaconda3\lib\site-packages\mlxtend\classifier\stacking_cv_classification.py

In the line 280 and 356, outside of if block, add this:

prediction = prediction.squeeze(axis=1) if len(prediction.shape)>2 else prediction

So, it will look something like this:

...
...
...
if not self.use_probas:
    prediction = prediction[:, np.newaxis]
elif self.drop_proba_col == "last":
    prediction = prediction[:, :-1]
elif self.drop_proba_col == "first":
    prediction = prediction[:, 1:]
prediction = prediction.squeeze(axis=1) if len(prediction.shape)>2 else prediction

if meta_features is None:
    meta_features = prediction
else:
    meta_features = np.column_stack((meta_features, prediction))
...
...
...

for model in self.clfs_:
    if not self.use_probas:
        prediction = model.predict(X)[:, np.newaxis]
    else:
        if self.drop_proba_col == "last":
            prediction = model.predict_proba(X)[:, :-1]
        elif self.drop_proba_col == "first":
            prediction = model.predict_proba(X)[:, 1:]
        else:
            prediction = model.predict_proba(X)
    prediction = prediction.squeeze(axis=1) if len(prediction.shape)>2 else prediction
    per_model_preds.append(prediction)
...
...
...

0
On

Prakash's answer raises really good points.

If you want to get this running without too many changes, you can roll your own version of a scikit-learn BaseEstimator/ClassifierMixin object, or wrap in the recommended KerasClassifier object.

i.e. You can roll your own estimator like this:

class MyKerasModel(BaseEstimator, ClassifierMixin):
    def fit(self, X, y):
        model = keras.Sequential()
        model.add(layers.Input(shape=X.shape[1]))
        model.add(layers.Dense(10, input_dim=10, activation='relu'))
        model.add(layers.Dropout(0.2))
        model.add(layers.Flatten())
        model.add(layers.Dense(units = 1, activation = 'sigmoid'))
        optimizer= keras.optimizers.RMSprop(learning_rate=0.001)
        model.compile(loss='binary_crossentropy',
                      optimizer=optimizer, metrics=[keras.metrics.AUC(), 'accuracy'])
        model.fit(X, y)
        self.model = model
        return self
    def predict(self, X):
        return (self.model.predict(X) > 0.5).flatten()

And putting all the pieces together allows you to stack the predictions:

from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
from mlxtend.classifier import StackingCVClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

X, y = make_classification()
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=0)

class MyKerasModel(BaseEstimator, ClassifierMixin):
    def fit(self, X, y):
        model = keras.Sequential()
        model.add(layers.Input(shape=X.shape[1]))
        model.add(layers.Dense(10, input_dim=10, activation='relu'))
        model.add(layers.Dropout(0.2))
        model.add(layers.Flatten())
        model.add(layers.Dense(units = 1, activation = 'sigmoid'))
        optimizer= keras.optimizers.RMSprop(learning_rate=0.001)
        model.compile(loss='binary_crossentropy',
                      optimizer=optimizer, metrics=[keras.metrics.AUC(), 'accuracy'])
        model.fit(X, y)
        self.model = model
        return self
    def predict(self, X):
        return (self.model.predict(X) > 0.5).flatten()

clf = StackingCVClassifier(
    classifiers=[RandomForestClassifier(), LogisticRegression(), MyKerasModel()],
    meta_classifier=MLPClassifier(),
).fit(X_train, y_train)
print("Stacking model score: %.3f" % clf.score(X_val, y_val))

Output:

2/2 [==============================] - 0s 11ms/step - loss: 0.8580 - auc: 0.5050 - accuracy: 0.5500
2/2 [==============================] - 0s 1ms/step
2/2 [==============================] - 0s 4ms/step - loss: 0.6955 - auc_1: 0.5777 - accuracy: 0.5750
2/2 [==============================] - 0s 1ms/step
3/3 [==============================] - 0s 3ms/step - loss: 0.7655 - auc_2: 0.6037 - accuracy: 0.6125
Stacking model score: 1.000