How I get precision, recall, and f1-score from nltk.naivebayesclassifier?

19 Views Asked by At

I have a text classification problem with 3 target variables and I'm running a separate nltk.naivebayesclassifier for each. I have a function that trains, tests, and evaluates the training and testing by obtaining the accuracy, precision, recall, and f1-score. For each target variable, it inputs the training dataset, test dataset, and the name of the target variable. For some reason, the latter 3 metrics show up as "None". How do I fix this?

from nltk.classify import NaiveBayesClassifier
import collections
from nltk.metrics.scores import precision, recall, f_measure

def train_and_test_classifier(features_train, features_test, target_name):
    # Train the clf
    clf = NaiveBayesClassifier.train(features_train)
    
    # Initialize dictionaries to store refsets and testsets for evaluation
    refsets_train = collections.defaultdict(set)
    testsets_train = collections.defaultdict(set)
    refsets_test = collections.defaultdict(set)
    testsets_test = collections.defaultdict(set)
    
    # Separate features and labels for training and testing
    train_set = [(features, label) for features, label in features_train]
    test_set = [(features, label) for features, label in features_test]
    
    # Extract labels for refsets
    for i, (_, label) in enumerate(train_set):
        refsets_train[label].add(i)
    for i, (_, label) in enumerate(test_set):
        refsets_test[label].add(i)
    
    # Classify and build testsets
    for i, (feats, label) in enumerate(train_set):
        observed = clf.classify(feats)
        testsets_train[observed].add(i)
    for i, (feats, label) in enumerate(test_set):
        observed = clf.classify(feats)
        testsets_test[observed].add(i)
    
    # Calculate evaluation metrics for training data
    accuracy_train = nltk.classify.accuracy(clf, train_set)
    precision_train = precision(refsets_train[target_name], testsets_train[target_name])
    recall_train = recall(refsets_train[target_name], testsets_train[target_name])
    f1_train = f_measure(refsets_train[target_name], testsets_train[target_name])
    
    # Calculate evaluation metrics for testing data
    accuracy_test = nltk.classify.accuracy(clf, test_set)
    precision_test = precision(refsets_test[target_name], testsets_test[target_name])
    recall_test = recall(refsets_test[target_name], testsets_test[target_name])
    f1_test = f_measure(refsets_test[target_name], testsets_test[target_name])
    
    # Return metrics as a dictionary
    metrics = {
        f"{target_name} (Train)": [accuracy_train, precision_train, recall_train, f1_train],
        f"{target_name} (Test)": [accuracy_test, precision_test, recall_test, f1_test]
    }
    
    return metrics

# Example usage:
metrics = train_and_test_classifier(function_features_train, function_features_test, "Job Function")
print(metrics)

{'Job Function (Train)': [0.9052187628583185, None, None, None], 'Job Function (Test)': [0.9040435558329133, None, None, None]}
0

There are 0 best solutions below