Why does my confusion matrix look like this?

22 Views Asked by At

This is my predictive modeling code for fraud detection on the AML dataset

import torch
import torch.nn as nn
import torch.utils.data as Data
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import SMOTE


class MyDataset(Data.Dataset):
    def __init__(self, file_path):
        df = pd.read_csv(file_path)

        X = df.values[:, :-1]
        y = df.values[:, -1]
        smote = SMOTE()
        X_resampled, y_resampled = smote.fit_resample(X, y)

        self.X = torch.tensor(X_resampled, dtype=torch.float32)
        self.y = torch.tensor(y_resampled, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        return self.X[index], self.y[index]


class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out


def calculate_metrics(predictions, targets):
    accuracy = accuracy_score(targets, predictions)
    recall = recall_score(targets, predictions)
    precision = precision_score(targets, predictions)
    f1 = f1_score(targets, predictions)
    cm = confusion_matrix(targets, predictions)
    return accuracy, recall, precision, f1, cm


# Define the training function  
def train(train_loader, val_loader, model, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        for i, (inputs, labels) in enumerate(train_loader):
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            val_predictions = []
            val_targets = []
            for inputs, labels in val_loader:
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                val_predictions.extend(predicted.tolist())
                val_targets.extend(labels.tolist())

            val_predictions = np.array(val_predictions)
            val_targets = np.array(val_targets)

            accuracy, recall, precision, f1, cm = calculate_metrics(val_predictions, val_targets)

            print('Epoch [{}/{}], Validation Accuracy: {:.2f}%, Recall: {:.2f}, Precision: {:.2f}, F1 Score: {:.2f}'
                  .format(epoch + 1, num_epochs, accuracy * 100, recall, precision, f1))
            print('Confusion Matrix:\n', cm)


file_path = 'D:/Program Files/JetBrains/Finance/AML/transactions_processed.csv'
dataset = MyDataset(file_path)

train_dataset = []
val_dataset = []
test_dataset = []

for i in range(len(dataset)):
    x, y = dataset[i]
    timestamp = x[-2]

    threshold_train = 119
    threshold_val = 159
    if timestamp < threshold_train:
        train_dataset.append((x, y))
    elif threshold_train <= timestamp < threshold_val:
        val_dataset.append((x, y))
    else:
        test_dataset.append((x, y))

train_dataset = Data.TensorDataset(torch.stack([sample[0] for sample in train_dataset]), torch.tensor([sample[1] for sample in train_dataset]))
val_dataset = Data.TensorDataset(torch.stack([sample[0] for sample in val_dataset]), torch.tensor([sample[1] for sample in val_dataset]))
test_dataset = Data.TensorDataset(torch.stack([sample[0] for sample in test_dataset]), torch.tensor([sample[1] for sample in test_dataset]))

train_loader = Data.DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
val_loader = Data.DataLoader(dataset=val_dataset, batch_size=64, shuffle=False)
test_loader = Data.DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

model = MLP(input_size=7, hidden_size=64, num_classes=2)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train(train_loader, val_loader, model, criterion, optimizer, num_epochs=10)

model.eval()
with torch.no_grad():
    test_predictions = []
    test_targets = []
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        test_predictions.extend(predicted.tolist())
        test_targets.extend(labels.tolist())

    test_predictions = np.array(test_predictions)
    test_targets = np.array(test_targets)

    # Calculate metrics for the test set  
    accuracy, recall, precision, f1, cm = calculate_metrics(test_predictions, test_targets)
    print('Test Accuracy: {:.2f}%, Recall: {:.2f}, Precision: {:.2f}, F1 Score: {:.2f}'
          .format(accuracy * 100, recall, precision, f1))
    print('Confusion Matrix:\n', cm)

Here's my output:

Epoch [1/10], Validation Accuracy: 97.14%, Recall: 0.94, Precision: 1.00, F1 Score: 0.97 Confusion Matrix: [[266144 0] [ 14861 238894]]

Epoch [2/10], Validation Accuracy: 83.31%, Recall: 1.00, Precision: 0.75, F1 Score: 0.85 Confusion Matrix: [[179397 86747] [ 0 253755]]

Epoch [3/10], Validation Accuracy: 90.43%, Recall: 0.80, Precision: 1.00, F1 Score: 0.89 Confusion Matrix: [[266144 0] [ 49763 203992]]

Epoch [4/10], Validation Accuracy: 99.66%, Recall: 1.00, Precision: 0.99, F1 Score: 1.00 Confusion Matrix: [[264383 1761] [ 0 253755]]

Epoch [5/10], Validation Accuracy: 93.68%, Recall: 1.00, Precision: 0.89, F1 Score: 0.94 Confusion Matrix: [[233307 32837] [ 0 253755]]

Epoch [6/10], Validation Accuracy: 98.91%, Recall: 0.98, Precision: 1.00, F1 Score: 0.99 Confusion Matrix: [[266144 0] [ 5681 248074]]

Epoch [7/10], Validation Accuracy: 99.16%, Recall: 0.98, Precision: 1.00, F1 Score: 0.99 Confusion Matrix: [[266144 0] [ 4360 249395]]

Epoch [8/10], Validation Accuracy: 99.78%, Recall: 1.00, Precision: 1.00, F1 Score: 1.00 Confusion Matrix: [[266144 0] [ 1121 252634]]

Epoch [9/10], Validation Accuracy: 99.97%, Recall: 1.00, Precision: 1.00, F1 Score: 1.00 Confusion Matrix: [[266144 0] [ 147 253608]]

Epoch [10/10], Validation Accuracy: 100.00%, Recall: 1.00, Precision: 1.00, F1 Score: 1.00 Confusion Matrix: [[266144 0] [ 0 253755]]

Test Accuracy: 99.97%, Recall: 1.00, Precision: 1.00, F1 Score: 1.00 Confusion Matrix: [[272648 0] [ 155 295508]]

I'm trying to figure out why FP and FT are rotating to 0?

0

There are 0 best solutions below