I am trying to implement an ordinal classifier based on the sources below:
- original source: Simple Trick to Train an Ordinal Regression with any Classifier
- updated coding: How to use ordinal classifier?
In my training dataset, I have 5 categories (usefulness level 1 - level 5). However, I only end up with level 1- level 4 usefulness in the prediction for the test dataset using the code above (the answer in How to use ordinal classifier?). I have enough samples in both training (n=1,000) and test (n=15,000) datasets.
from sklearn.base import clone
import numpy as np
# Source:
# 1. https://stackoverflow.com/questions/66486947/how-to-use-ordinal-classifier
# 2. https://towardsdatascience.com/simple-trick-to-train-an-ordinal-regression-with-any-classifier-6911183d2a3c
class OrdinalClassifier():
def __init__(self, clf):
self.clf = clf
self.clfs = {}
def fit(self, X, y):
self.unique_class = np.sort(np.unique(y))
if self.unique_class.shape[0] > 2:
for i in range(self.unique_class.shape[0] - 1):
# for each k - 1 ordinal value we fit a binary classification problem
binary_y = (y > self.unique_class[i]).astype(np.uint8)
clf = clone(self.clf)
clf.fit(X, binary_y)
self.clfs[i] = clf
def predict_proba(self, X):
clfs_predict = {k: v.predict_proba(X) for k, v in self.clfs.items()}
predicted = []
for i, y in enumerate(self.unique_class):
if i == 0:
# V1 = 1 - Pr(y > V1)
predicted.append(1 - clfs_predict[i][:, 1])
elif y in clfs_predict:
# Vi = Pr(y > Vi-1) - Pr(y > Vi)
predicted.append(clfs_predict[i - 1][:, 1] - clfs_predict[i][:, 1])
else:
# Vk = Pr(y > Vk-1)
predicted.append(clfs_predict[i - 1][:, 1])
return np.vstack(predicted).T
def predict(self, X):
return self.unique_class[np.argmax(self.predict_proba(X), axis=1)]
knn = KNeighborsClassifier()
oc = OrdinalClassifier(knn)
oc.fit(X_train, y_train)
oc.predict(X_test)
Stats for each class in the training dataset and the three prediction models