Doing the Contextual Bandit tutorial https://vowpalwabbit.org/docs/vowpal_wabbit/python/latest/tutorials/python_Contextual_bandits_and_Vowpal_Wabbit.html I wanted to try what would the predicted choice/action be when the training dataset was passed to get predictions from the model. I got the following results and I don't understand why the model predicted a different action and not the exact same action as in the training dataset.
I get the predictions like mentioned in the tutorial for the Test DataFrame.
From an expectation point of view, I think the model should have given the same actions as the actions in the training dataset when asked to predict on it.
Code reference
# Prepare the training dataset.
train_data = [
{
'action': 1,
'cost': 2,
'probability': 0.4,
'feature1': 'a',
'feature2': 'c',
'feature3': '',
},
{
'action': 3,
'cost': 0,
'probability': 0.2,
'feature1': 'b',
'feature2': 'd',
'feature3': '',
},
{
'action': 4,
'cost': 1,
'probability': 0.5,
'feature1': 'a',
'feature2': 'b',
'feature3': '',
},
{
'action': 2,
'cost': 1,
'probability': 0.3,
'feature1': 'a',
'feature2': 'b',
'feature3': 'c',
},
{
'action': 3,
'cost': 1,
'probability': 0.7,
'feature1': 'a',
'feature2': 'd',
'feature3': '',
},
]
train_df = pd.DataFrame(train_data)
train_df['index'] = range(1, len(train_df) + 1)
train_df = train_df.set_index('index')
# Prepare the test dataset.
test_data = [
{'feature1': 'a', 'feature2': 'c', 'feature3': ''},
{'feature1': 'b', 'feature2': 'c', 'feature3': ''},
{'feature1': 'a', 'feature2': '', 'feature3': 'b'},
{'feature1': 'b', 'feature2': 'b', 'feature3': ''},
{'feature1': 'a', 'feature2': '', 'feature3': 'b'},
]
test_df = pd.DataFrame(test_data)
test_df['index'] = range(1, len(test_df) + 1)
test_df = test_df.set_index('index')
# Initialize a contextual bandit with four possible actions.
model = vowpalwabbit.Workspace('--cb 4', quiet=True)
# Train the model on each of the training samples.
for i, row in train_df.iterrows():
# Construct the example in the required Vowpal Wabbit format.
row_vw = ('{}:{}:{} | {} {} {}'.format(
str(row['action']),
str(row['cost']),
str(row['probability']),
str(row['feature1']),
str(row['feature2']),
str(row['feature3']),
))
model.learn(row_vw)
# Get prediction from the model on both the training and test dataset.
def calculate_prediction(row, model):
# Construct the example in the required Vowpal Wabbit format.
row_vw = '| {} {} {}'.format(row['feature1'], row['feature2'], row['feature3'])
# Get prediction from the model.
predicted_action = model.predict(row_vw)
return predicted_action
train_df['predicted_action'] = train_df.apply(calculate_prediction, axis=1, model=model)
test_df['predicted_action'] = test_df.apply(calculate_prediction, axis=1, model=model)
# Predicted actions on the train dataset.
train_df

