I am relatively new to Pytorch, and want to know why my predicted output tensors always return the same class label in my output tensor. Right now, its giving me class labels of 4; My class labels are 0,1,2,3,4, corresponding with the classes 'healthy', 'mild npdr', 'moderate npdr', 'severe npdr' and 'pdr'
Input:
print("Predicted classes", outputs.argmax(-1))
Output:
Predicted classes tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4])
Furthermore, when I printed out my output tensor, (in an attempt to debug), it kept on showing the same output tensor for each row
Input:
outputs = model(inputs)
print(outputs)
Outputs:
tensor([[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753]],
grad_fn=<AddmmBackward0>)
This is the entire output for the code I am going to provide later on
class: healthy, num of datapoints: 5382
class: mild npdr, num of datapoints: 2443
class: moderate npdr, num of datapoints: 5292
class: severe npdr, num of datapoints: 873
class: pdr, num of datapoints: 708
Initial shape: torch.Size([1, 3, 144, 144])
Patches shape: torch.Size([1, 324, 128])
Number of parameters in the model: 3658501
>>> Epoch 1 train loss: 1.620927890357764 train accuracy: 0.20062935873447865
>>> Epoch 1 test loss: 1.6050984613273456 test accuracy: 0.06054421768707483
tensor([[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753],
[ 0.1038, 0.2198, 0.3059, -0.1813, 0.0753]],
grad_fn=<AddmmBackward0>)
Predicted classes tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4])
Actual classes tensor([2, 2, 0, 2, 2, 1, 0, 0, 2, 2, 1, 2, 2, 0, 0, 2, 2, 0, 0, 0, 2, 0, 3, 2,
1, 0, 2, 0, 4, 2, 2, 2])
It did the same thing with 0, until I added class weights. These are my class weights btw:
class_weights = []
total_samples = len(dataset)
num_classes = 5
class_counts = [5382, 2443, 5292, 873, 708] #class label 0 corresponds with 5382, 1 with 2443, etc.
total_classes = sum(class_counts)
for count in class_counts:
class_weight = total_classes / (num_classes * count)
class_weights.append(class_weight)
# Convert class weights to tensor
class_weights_tensor = torch.tensor(class_weights, device=device)
criterion = nn.CrossEntropyLoss(weight = class_weights_tensor)
This is my full code:
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
import torch
import os
import matplotlib.pyplot as plt
from torchvision.transforms.functional import to_pil_image
from torch import nn
from einops.layers.torch import Rearrange
from torch import Tensor
from einops import repeat
from torch.utils.data import DataLoader
from torch.utils.data import random_split
import torch.optim as optim
import numpy as np
import random
import torch.nn.functional as F
from torchvision.transforms import Resize, ToTensor
path_train = r"C:\Users\Sarim&Sahar\OneDrive\Desktop\ViTs for DBRP\data\training_data"
path_test = r"C:\Users\Sarim&Sahar\OneDrive\Desktop\ViTs for DBRP\data\testing_data"
class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, target):
for t in self.transforms:
image = t(image)
return image, target
#----------------------------COUNTING # OF DATAPOINTS-----------------------------------------
classes = ['healthy', 'mild npdr', 'moderate npdr', 'severe npdr', 'pdr']
for i in classes:
class_path = os.path.join(path_train, i)
num_images = len([file for file in os.listdir(class_path) if file.endswith(('jpg', 'jpeg', 'png'))])
print(f"class: {i}, num of datapoints: {num_images}")
#WE NEED > 1 IMAGE FOR EACH CLASs
#----------------------------------------PATCHING-----------------------------------------------------
from torchvision import transforms
class CustomDataset(Dataset):
def __init__(self, root_dir, transform=None):
self.root_dir = root_dir
self.transform = transform
self.classes = os.listdir(root_dir)
self.images = []
self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}
for cls in self.classes:
cls_path = os.path.join(root_dir, cls)
if os.path.isdir(cls_path):
cls_images = [os.path.join(cls_path, img) for img in os.listdir(cls_path)]
self.images.extend([(img, self.class_to_idx[cls]) for img in cls_images])
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
img_path, label = self.images[idx]
image = Image.open(img_path)
if self.transform:
image = self.transform(image)
return image, label
# Define data augmentation transforms
data_transform = transforms.Compose([
transforms.Resize((144, 144)),
transforms.ToTensor()
])
# Use data augmentation in the dataset
dataset = CustomDataset(root_dir=path_train, transform=data_transform)
#-----------Einops Reshaping---------------------------------------------
class PatchEmbedding(nn.Module):
def __init__(self, in_channels=3, patch_size=8, emb_size=128):
self.patch_size = patch_size
super().__init__()
self.projection = nn.Sequential(
Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=patch_size, p2=patch_size),
nn.Linear((patch_size ** 2) * in_channels, emb_size)
)
# Initialize the linear layer weights randomly
nn.init.xavier_uniform_(self.projection[1].weight)
def forward(self, x: Tensor) -> Tensor:
x = self.projection(x)
return x
#---------------------------------------------------------------------------------------------
label_mapping = {
0: "healthy",
1: "mild npdr",
2: "moderate npdr",
3: "severe npdr",
4: "pdr"
}
def show_images(dataset, num_samples=20, cols=4):
# Get a random subset of indices
random_dataset = random.sample(list(range(len(dataset))), num_samples)
plt.figure(figsize=(15, 15))
for i, idx in enumerate(random_dataset):
image, target = dataset[idx]
plt.subplot(int(num_samples/cols) + 1, cols, i + 1)
plt.imshow(to_pil_image(image[0]))
plt.colorbar()
plt.title(label_mapping[target])
plt.axis('on')
plt.show()
show_images(dataset)
#-------------------------------------MULTI-HEAD ATTENTION-----------------------------------------------
class Attention(nn.Module):
def __init__(self, dim, n_heads, dropout):
super().__init__()
self.n_heads = n_heads
self.att = torch.nn.MultiheadAttention(embed_dim=dim, num_heads=n_heads, dropout=0.1)
self.q = torch.nn.Linear(dim, dim)
self.k = torch.nn.Linear(dim, dim)
self.v = torch.nn.Linear(dim, dim)
self.dropout = nn.Dropout(dropout)
self.norm = nn.LayerNorm(dim)
def forward(self, x):
q = self.q(x)
k = self.k(x)
v = self.v(x)
attn_output, attn_output_weights = self.att(q, k, v)
attn_output = self.norm(self.dropout(attn_output) + x)
return attn_output
Attention(dim=128, n_heads=4, dropout=0.1)(torch.ones((1, 5, 128))).shape
#--------------------------------------------------------------------------------------------------
sample_datapoint = torch.unsqueeze(dataset[0][0], 0)
print("Initial shape: ", sample_datapoint.shape) # 1 = batch dimention, 3 = color channels, dimensions = 144 by 144
embedding = PatchEmbedding()(sample_datapoint)
print("Patches shape: ", embedding.shape) # After applyign the patch embeedding, there are 324 patches, and each of the patches of a dimension of 128*128
# We get the number 324, because number of patches = (image height/patch height)(image width/patch width)
#AND our intial image shape was set to 144*144. 144 divided by our patch size of 8 equals to 18. 18 squared = 324
#----------------
class PreNorm(nn.Module):
def __init__(self, dim, fn):
super().__init__()
self.norm = nn.LayerNorm(dim)
self.fn = fn
def forward(self, x, **kwargs):
return self.fn(self.norm(x), **kwargs)
norm = PreNorm(128, Attention(dim=128, n_heads=4, dropout=0.1))
norm(torch.ones((1, 5, 128))).shape
#==
class FeedForward(nn.Module):
def __init__(self, dim, hidden_dim, dropout=0.1):
super().__init__()
self.linear1 = nn.Linear(dim, hidden_dim)
self.activation = nn.GELU()
self.dropout1 = nn.Dropout(dropout)
self.linear2 = nn.Linear(hidden_dim, dim)
self.dropout2 = nn.Dropout(dropout)
self.norm = nn.LayerNorm(dim)
def forward(self, x):
x = self.norm(self.dropout2(self.linear2(self.dropout1(self.activation(self.linear1(x)))) + x))
return x
ff = FeedForward(dim=128, hidden_dim=256)
ff(torch.ones((1, 5, 128))).shape
class ResidualAdd(nn.Module):
def __init__(self, fn):
super().__init__()
self.fn = fn
def forward(self, x, **kwargs):
res = x
x = self.fn(x, **kwargs)
x += res
return x
residual_att = ResidualAdd(Attention(dim=128, n_heads=4, dropout=0.1))
residual_att(torch.ones((1, 5, 128))).shape
class ViT(nn.Module):
def __init__(self, ch=3, img_size=144, patch_size=8, emb_dim=128, n_layers=24, out_dim=5, dropout=0.1, heads=4):
super(ViT, self).__init__()
# Attributes
self.channels = ch
self.height = img_size
self.width = img_size
self.patch_size = patch_size
self.n_layers = n_layers
# Patching
self.patch_embedding = PatchEmbedding(in_channels=ch,
patch_size=patch_size,
emb_size=emb_dim)
num_patches = (img_size // patch_size) ** 2
self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, emb_dim)) # Positional embedding weights
self.cls_token = nn.Parameter(torch.rand(1, 1, emb_dim)) # Classification token weights
# Initialize position embeddings with nn.Parameter
self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, emb_dim))
# Transformer Encoder
self.layers = nn.ModuleList([])
for _ in range(n_layers):
transformer_block = nn.Sequential(
ResidualAdd(PreNorm(emb_dim, Attention(emb_dim, n_heads = heads, dropout = dropout))),
ResidualAdd(PreNorm(emb_dim, FeedForward(emb_dim, emb_dim, dropout = dropout))))
self.layers.append(transformer_block)
# Classification head
self.head = nn.Sequential(nn.LayerNorm(emb_dim), nn.Linear(emb_dim, out_dim))
def forward(self, img):
# Get patch embedding vectors
x = self.patch_embedding(img)
b, n, _ = x.shape
# Add cls token to inputs
cls_tokens = repeat(self.cls_token, '1 1 d -> b 1 d', b = b)
x = torch.cat([cls_tokens, x], dim=1)
x += self.pos_embedding[:, :(n + 1)]
# Transformer layers
for i in range(self.n_layers):
x = self.layers[i](x)
# Output based on classification token
return self.head(x[:, 0, :])
device = "cpu"
model = ViT()
model(torch.ones((1, 3, 144, 144)))
train_split = int(0.8 * len(dataset))
train, test = random_split(dataset, [train_split, len(dataset) - train_split])
train_dataloader = DataLoader(train, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test, batch_size=32, shuffle=False)
correct_predictions = 0
total_samples = 0
optimizer = optim.AdamW(model.parameters(), lr=0.001)
#------------------Weights---------------------------------
class_weights = []
total_samples = len(dataset)
num_classes = 5
class_counts = [5382, 2443, 5292, 1049, 708] # Replace with actual counts
total_classes = sum(class_counts)
for count in class_counts:
class_weight = total_samples / (num_classes * count)
class_weights.append(class_weight)
# Convert class weights to tensor
class_weights_tensor = torch.tensor(class_weights, device=device)
criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
#-----------Training------------------------------------
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
# Count the number of parameters in the model
num_params = count_parameters(model)
print(f"Number of parameters in the model: {num_params}")
num_epochs = 1
for epoch in range(num_epochs):
train_losses = []
train_correct_predictions = 0
train_total_samples = 0
# Training phase
model.train()
for step, (inputs, labels) in enumerate(train_dataloader):
optimizer.zero_grad()
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
# Calculate the loss with class weights
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
train_losses.append(loss.item())
# Calculate accuracy during training
_, predicted = torch.max(outputs, 1)
train_correct_predictions += torch.sum(predicted == labels).item()
train_total_samples += labels.size(0)
train_accuracy = train_correct_predictions / train_total_samples
# Validation/testing phase
model.eval()
val_losses = []
val_correct_predictions = 0
val_total_samples = 0
with torch.no_grad():
for step, (inputs, labels) in enumerate(test_dataloader):
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
val_losses.append(loss.item())
# Calculate accuracy during validation
_, predicted = torch.max(outputs, 1)
val_correct_predictions += torch.sum(predicted == labels).item()
val_total_samples += labels.size(0)
val_accuracy = val_correct_predictions / val_total_samples
print(f">>> Epoch {epoch+1} train loss: {np.mean(train_losses)} train accuracy: {train_accuracy}")
print(f">>> Epoch {epoch+1} test loss: {np.mean(val_losses)} test accuracy: {val_accuracy}")
#eval after training
model.eval()
inputs, labels = next(iter(test_dataloader))
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
probabilities = torch.nn.functional.softmax(outputs, dim=1)
_, predicted_classes = torch.max(probabilities, 1)
print(outputs)
print("Predicted classes", predicted_classes)
print("Actual classes", labels)
#--------------------------------------------
Any help in debugging would be appreciated. Also, please let me know if there is anything wrong with my class weights
EDIT: what ive found is that my code only predicts one class tensor at a time, meaning that its either all 0's, 1's, 2's, 3's or 4's