I have a text classification problem with 3 target variables and I'm running a separate nltk.naivebayesclassifier for each. I have a function that trains, tests, and evaluates the training and testing by obtaining the accuracy, precision, recall, and f1-score. For each target variable, it inputs the training dataset, test dataset, and the name of the target variable. For some reason, the latter 3 metrics show up as "None". How do I fix this?
from nltk.classify import NaiveBayesClassifier
import collections
from nltk.metrics.scores import precision, recall, f_measure
def train_and_test_classifier(features_train, features_test, target_name):
# Train the clf
clf = NaiveBayesClassifier.train(features_train)
# Initialize dictionaries to store refsets and testsets for evaluation
refsets_train = collections.defaultdict(set)
testsets_train = collections.defaultdict(set)
refsets_test = collections.defaultdict(set)
testsets_test = collections.defaultdict(set)
# Separate features and labels for training and testing
train_set = [(features, label) for features, label in features_train]
test_set = [(features, label) for features, label in features_test]
# Extract labels for refsets
for i, (_, label) in enumerate(train_set):
refsets_train[label].add(i)
for i, (_, label) in enumerate(test_set):
refsets_test[label].add(i)
# Classify and build testsets
for i, (feats, label) in enumerate(train_set):
observed = clf.classify(feats)
testsets_train[observed].add(i)
for i, (feats, label) in enumerate(test_set):
observed = clf.classify(feats)
testsets_test[observed].add(i)
# Calculate evaluation metrics for training data
accuracy_train = nltk.classify.accuracy(clf, train_set)
precision_train = precision(refsets_train[target_name], testsets_train[target_name])
recall_train = recall(refsets_train[target_name], testsets_train[target_name])
f1_train = f_measure(refsets_train[target_name], testsets_train[target_name])
# Calculate evaluation metrics for testing data
accuracy_test = nltk.classify.accuracy(clf, test_set)
precision_test = precision(refsets_test[target_name], testsets_test[target_name])
recall_test = recall(refsets_test[target_name], testsets_test[target_name])
f1_test = f_measure(refsets_test[target_name], testsets_test[target_name])
# Return metrics as a dictionary
metrics = {
f"{target_name} (Train)": [accuracy_train, precision_train, recall_train, f1_train],
f"{target_name} (Test)": [accuracy_test, precision_test, recall_test, f1_test]
}
return metrics
# Example usage:
metrics = train_and_test_classifier(function_features_train, function_features_test, "Job Function")
print(metrics)
{'Job Function (Train)': [0.9052187628583185, None, None, None], 'Job Function (Test)': [0.9040435558329133, None, None, None]}