Using PyTorch, I have an ANN model (for a classification task) below:
import torch
import torch.nn as nn
# Setting up artifical neural net model which separates out categorical
# from continuous features, so that embedding could be applied to
# categorical features
class TabularModel(nn.Module):
# Initialize parameters embeds, emb_drop, bn_cont and layers
def __init__(self, emb_szs, n_cont, out_sz, layers, p=0.5):
super().__init__()
self.embeds = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in emb_szs])
self.emb_drop = nn.Dropout(p)
self.bn_cont = nn.BatchNorm1d(n_cont)
# Create empty list for each layer in the neural net
layerlist = []
# Number of all embedded columns for categorical features
n_emb = sum((nf for ni, nf in emb_szs))
# Number of inputs for each layer
n_in = n_emb + n_cont
for i in layers:
# Set the linear function for the weights and biases, wX + b
layerlist.append(nn.Linear(n_in, i))
# Using ReLu activation function
layerlist.append(nn.ReLU(inplace=True))
# Normalised all the activation function output values
layerlist.append(nn.BatchNorm1d(i))
# Set some of the normalised activation function output values to zero
layerlist.append(nn.Dropout(p))
# Reassign number of inputs for the next layer
n_in = i
# Append last layer
layerlist.append(nn.Linear(layers[-1], out_sz))
# Create sequential layers
self.layers = nn.Sequential(*layerlist)
# Function for feedforward
def forward(self, x_cat_cont):
x_cat = x_cat_cont[:,0:cat_train.shape[1]].type(torch.int64)
x_cont = x_cat_cont[:,cat_train.shape[1]:].type(torch.float32)
# Create empty list for embedded categorical features
embeddings = []
# Embed categorical features
for i, e in enumerate(self.embeds):
embeddings.append(e(x_cat[:,i]))
# Concatenate embedded categorical features
x = torch.cat(embeddings, 1)
# Apply dropout rates to categorical features
x = self.emb_drop(x)
# Batch normalize continuous features
x_cont = self.bn_cont(x_cont)
# Concatenate categorical and continuous features
x = torch.cat([x, x_cont], 1)
# Feed categorical and continuous features into neural net layers
x = self.layers(x)
return x
I am trying to use this model with skorch's GridSearchCV, as below:
from skorch import NeuralNetBinaryClassifier
# Random seed chosen to ensure results are reproducible by using the same
# initial random weights and biases, and applying dropout rates to the same
# random embedded categorical features and neurons in the hidden layers
torch.manual_seed(0)
net = NeuralNetBinaryClassifier(module=TabularModel,
module__emb_szs=emb_szs,
module__n_cont=con_train.shape[1],
module__out_sz=2,
module__layers=[30],
module__p=0.0,
criterion=nn.CrossEntropyLoss,
criterion__weight=cls_wgt,
optimizer=torch.optim.Adam,
optimizer__lr=0.001,
max_epochs=150,
device='cuda'
)
from sklearn.model_selection import GridSearchCV
param_grid = {'module__layers': [[30], [50,20]],
'module__p': [0.0, 0.2, 0.4],
'max_epochs': [150, 175, 200, 225]
}
models = GridSearchCV(net, param_grid, scoring='roc_auc').fit(cat_con_train.cpu(), y_train.cpu())
models.best_params_
But when I ran the code, I am getting this error message below:
/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_validation.py:536: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
ValueError: Expected module output to have shape (n,) or (n, 1), got (128, 2) instead
FitFailedWarning)
/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_validation.py:536: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
ValueError: Expected module output to have shape (n,) or (n, 1), got (128, 2) instead
FitFailedWarning)
/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_validation.py:536: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
ValueError: Expected module output to have shape (n,) or (n, 1), got (128, 2) instead
FitFailedWarning)
/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_validation.py:536: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
ValueError: Expected module output to have shape (n,) or (n, 1), got (128, 2) instead
FitFailedWarning)
/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_validation.py:536: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
ValueError: Expected module output to have shape (n,) or (n, 1), got (128, 2) instead
FitFailedWarning)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-86-c408d65e2435> in <module>()
98
---> 99 models = GridSearchCV(net, param_grid, scoring='roc_auc').fit(cat_con_train.cpu(), y_train.cpu())
100
101 models.best_params_
11 frames
/usr/local/lib/python3.6/dist-packages/skorch/classifier.py in infer(self, x, **fit_params)
303 raise ValueError(
304 "Expected module output to have shape (n,) or "
--> 305 "(n, 1), got {} instead".format(tuple(y_infer.shape)))
306
307 y_infer = y_infer.reshape(-1)
ValueError: Expected module output to have shape (n,) or (n, 1), got (128, 2) instead
I am not sure what is wrong or how to fix this. Any help on this would really be appreciated.
Many thanks in advance!
To quote ptrblck on the pytorch forum who outlined the solution already:
His assesment was correct. skorch's
NeuralNetBinaryClassifier
expects they
to have one dimension, so a shape of(x, 1)
or(x,)
where the value ofy
is either 0 or 1. So a validy
would be: