Why test accuracy is much higher then train accuracy while training cnn

Question

Why test accuracy is much higher then train accuracy while training cnn

64 Views Asked by Dima At 19 October 2024 at 23:52

Python 3.9.5 torch 1.13.0+cu117 torchvision 0.14.0+cu117

I am currently training a Convolutional Neural Network (CNN) for an image classification task. I have observed that during the training process, the test accuracy consistently surpasses the train accuracy, which is contrary to what is expected. The network is trained on the MNIST dataset. here is my training results:


epoch=1, train loss=0.8197974562644958, train acc=0.7494, test loss=0.1455492526292801, test acc=0.9616

epoch=2, train loss=0.7107925415039062, train acc=0.7788333333333334, test loss=0.1208220049738884, test acc=0.9689

epoch=3, train loss=0.6579669713973999, train acc=0.7906666666666666, test loss=0.11497163027524948, test acc=0.9676

epoch=4, train loss=0.6305248141288757, train acc=0.7994333333333333, test loss=0.10593992471694946, test acc=0.97

epoch=5, train loss=0.5982099771499634, train acc=0.80585, test loss=0.09132635593414307, test acc=0.9714

epoch=6, train loss=0.5825754404067993, train acc=0.8125333333333333, test loss=0.09170813113451004, test acc=0.9723

epoch=7, train loss=0.5688086748123169, train acc=0.8155166666666667, test loss=0.08628570288419724, test acc=0.9737

epoch=8, train loss=0.5556393265724182, train acc=0.8193166666666667, test loss=0.08203426003456116, test acc=0.9762

epoch=9, train loss=0.546567976474762, train acc=0.8213833333333334, test loss=0.08405696600675583, test acc=0.9754

epoch=10, train loss=0.5374698638916016, train acc=0.8239333333333333, test loss=0.07133891433477402, test acc=0.9788

epoch=11, train loss=0.5179286599159241, train acc=0.82975, test loss=0.0744888037443161, test acc=0.9792

epoch=12, train loss=0.5131004452705383, train acc=0.8329, test loss=0.07630482316017151, test acc=0.9778

epoch=14, train loss=0.49787914752960205, train acc=0.8366666666666667, test loss=0.07209591567516327, test acc=0.9779

epoch=15, train loss=0.4968840777873993, train acc=0.83475, test loss=0.07035819441080093, test acc=0.9801

epoch=16, train loss=0.4877821207046509, train acc=0.83925, test loss=0.07009950280189514, test acc=0.9777

epoch=17, train loss=0.48330068588256836, train acc=0.84045, test loss=0.06527410447597504, test acc=0.9809

epoch=18, train loss=0.48005640506744385, train acc=0.8404166666666667, test loss=0.06624794006347656, test acc=0.9781

epoch=19, train loss=0.47614845633506775, train acc=0.8418833333333333, test loss=0.07185563445091248, test acc=0.9788

training code:

from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from pathlib import Path

from CNN import CNNmodel

SEED = 5
device = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 16
data_root = Path("data/")

torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

train_transform = transforms.Compose(\[
transforms.TrivialAugmentWide(num_magnitude_bins=8),
transforms.ToTensor()
\])

test_transform = transforms.ToTensor()

train_data = datasets.MNIST(
root=data_root / "train",
train=True,
download=True,
transform=train_transform
)

test_data = datasets.MNIST(
root=data_root / "test",
train=False,
download=True,
transform=test_transform
)

train_dataloader = DataLoader(
train_data,
batch_size=BATCH_SIZE,
shuffle=True
)

test_dataloader = DataLoader(
test_data,
batch_size=BATCH_SIZE,
shuffle=False
)

channel_num = train_data\[0\]\[0\].shape\[0\]
model = CNNmodel(in_shape=channel_num, hidden_shape=8, out_shape=len(train_data.classes)).to(device)
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()
epochs = 20

def train_step(dataloader, loss_fn, optimizer, model, device):
train_loss = 0
train_acc = 0

    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
    
        y_pred = model(X)
    
        loss = loss_fn(y_pred, y)
        train_loss += loss
        
        optimizer.zero_grad()
    
        loss.backward()
    
        optimizer.step()
    
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item()/len(y_pred)
    
    train_loss /= len(dataloader)
    train_acc /= len(dataloader)
    
    return (train_loss, train_acc)

    def test_step(dataloader, loss_fn, model, device):
test_loss = 0
test_acc = 0

    with torch.inference_mode():
        for batch, (X, y) in enumerate(dataloader):
            X, y = X.to(device), y.to(device)
    
            y_pred = model(X)
    
            loss = loss_fn(y_pred, y)
            test_loss += loss
    
            y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
            test_acc += (y_pred_class == y).sum().item()/len(y_pred)
        
        test_loss /= len(dataloader)
        test_acc /= len(dataloader)
    
    return (test_loss, test_acc)

for epoch in range(epochs):
train_loss, train_acc = train_step(
dataloader=train_dataloader,
loss_fn=loss_fn,
optimizer=optimizer,
model=model,
device=device
)

    test_loss, test_acc = test_step(
        dataloader=test_dataloader,
        loss_fn=loss_fn,
        model=model,
        device=device
    
    torch.cuda.empty_cache()
    print(f"epoch={epoch}, train loss={train_loss}, train acc={train_acc}, test loss={test_loss}, test acc={test_acc}\n")

and here is my model achitecture:

class CNNmodel(nn.Module):
    def __init__(self, in_shape, hidden_shape, out_shape) -> None:
        super().__init__()
        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(
                in_channels=in_shape,
                out_channels=hidden_shape,
                kernel_size=3,
                stride=1,
                padding=1
            ),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=hidden_shape,
                out_channels=hidden_shape,
                kernel_size=3,
                stride=1,
                padding=1
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(
                in_channels=hidden_shape,
                out_channels=hidden_shape,
                kernel_size=3,
                stride=1,
                padding=1
            ),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=hidden_shape,
                out_channels=hidden_shape,
                kernel_size=3,
                stride=1,
                padding=1
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=hidden_shape*7*7,
                      out_features=out_shape)
        )
    
    def forward(self, x):
        return self.classifier(self.conv_block_2(self.conv_block_1(x)))

i thought that problem is in how dataset is installed but i couldnt find anything

Original Q&A

There are 1 best solutions below

**Dima** · Accepted Answer

Dima On 11 November 2023 at 21:18 BEST ANSWER

I fixed this problem by setting batch size to 32.

Why test accuracy is much higher then train accuracy while training cnn

There are 1 best solutions below

Related Questions in PYTHON

Related Questions in CONV-NEURAL-NETWORK

Related Questions in TORCH

Related Questions in TORCHVISION

Trending Questions

Popular # Hahtags

Popular Questions