Python 3.9.5 torch 1.13.0+cu117 torchvision 0.14.0+cu117
I am currently training a Convolutional Neural Network (CNN) for an image classification task. I have observed that during the training process, the test accuracy consistently surpasses the train accuracy, which is contrary to what is expected. The network is trained on the MNIST dataset. here is my training results:
epoch=1, train loss=0.8197974562644958, train acc=0.7494, test loss=0.1455492526292801, test acc=0.9616
epoch=2, train loss=0.7107925415039062, train acc=0.7788333333333334, test loss=0.1208220049738884, test acc=0.9689
epoch=3, train loss=0.6579669713973999, train acc=0.7906666666666666, test loss=0.11497163027524948, test acc=0.9676
epoch=4, train loss=0.6305248141288757, train acc=0.7994333333333333, test loss=0.10593992471694946, test acc=0.97
epoch=5, train loss=0.5982099771499634, train acc=0.80585, test loss=0.09132635593414307, test acc=0.9714
epoch=6, train loss=0.5825754404067993, train acc=0.8125333333333333, test loss=0.09170813113451004, test acc=0.9723
epoch=7, train loss=0.5688086748123169, train acc=0.8155166666666667, test loss=0.08628570288419724, test acc=0.9737
epoch=8, train loss=0.5556393265724182, train acc=0.8193166666666667, test loss=0.08203426003456116, test acc=0.9762
epoch=9, train loss=0.546567976474762, train acc=0.8213833333333334, test loss=0.08405696600675583, test acc=0.9754
epoch=10, train loss=0.5374698638916016, train acc=0.8239333333333333, test loss=0.07133891433477402, test acc=0.9788
epoch=11, train loss=0.5179286599159241, train acc=0.82975, test loss=0.0744888037443161, test acc=0.9792
epoch=12, train loss=0.5131004452705383, train acc=0.8329, test loss=0.07630482316017151, test acc=0.9778
epoch=14, train loss=0.49787914752960205, train acc=0.8366666666666667, test loss=0.07209591567516327, test acc=0.9779
epoch=15, train loss=0.4968840777873993, train acc=0.83475, test loss=0.07035819441080093, test acc=0.9801
epoch=16, train loss=0.4877821207046509, train acc=0.83925, test loss=0.07009950280189514, test acc=0.9777
epoch=17, train loss=0.48330068588256836, train acc=0.84045, test loss=0.06527410447597504, test acc=0.9809
epoch=18, train loss=0.48005640506744385, train acc=0.8404166666666667, test loss=0.06624794006347656, test acc=0.9781
epoch=19, train loss=0.47614845633506775, train acc=0.8418833333333333, test loss=0.07185563445091248, test acc=0.9788
training code:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from pathlib import Path
from CNN import CNNmodel
SEED = 5
device = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 16
data_root = Path("data/")
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
train_transform = transforms.Compose(\[
transforms.TrivialAugmentWide(num_magnitude_bins=8),
transforms.ToTensor()
\])
test_transform = transforms.ToTensor()
train_data = datasets.MNIST(
root=data_root / "train",
train=True,
download=True,
transform=train_transform
)
test_data = datasets.MNIST(
root=data_root / "test",
train=False,
download=True,
transform=test_transform
)
train_dataloader = DataLoader(
train_data,
batch_size=BATCH_SIZE,
shuffle=True
)
test_dataloader = DataLoader(
test_data,
batch_size=BATCH_SIZE,
shuffle=False
)
channel_num = train_data\[0\]\[0\].shape\[0\]
model = CNNmodel(in_shape=channel_num, hidden_shape=8, out_shape=len(train_data.classes)).to(device)
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()
epochs = 20
def train_step(dataloader, loss_fn, optimizer, model, device):
train_loss = 0
train_acc = 0
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
y_pred = model(X)
loss = loss_fn(y_pred, y)
train_loss += loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
train_acc += (y_pred_class == y).sum().item()/len(y_pred)
train_loss /= len(dataloader)
train_acc /= len(dataloader)
return (train_loss, train_acc)
def test_step(dataloader, loss_fn, model, device):
test_loss = 0
test_acc = 0
with torch.inference_mode():
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
y_pred = model(X)
loss = loss_fn(y_pred, y)
test_loss += loss
y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
test_acc += (y_pred_class == y).sum().item()/len(y_pred)
test_loss /= len(dataloader)
test_acc /= len(dataloader)
return (test_loss, test_acc)
for epoch in range(epochs):
train_loss, train_acc = train_step(
dataloader=train_dataloader,
loss_fn=loss_fn,
optimizer=optimizer,
model=model,
device=device
)
test_loss, test_acc = test_step(
dataloader=test_dataloader,
loss_fn=loss_fn,
model=model,
device=device
torch.cuda.empty_cache()
print(f"epoch={epoch}, train loss={train_loss}, train acc={train_acc}, test loss={test_loss}, test acc={test_acc}\n")
and here is my model achitecture:
class CNNmodel(nn.Module):
def __init__(self, in_shape, hidden_shape, out_shape) -> None:
super().__init__()
self.conv_block_1 = nn.Sequential(
nn.Conv2d(
in_channels=in_shape,
out_channels=hidden_shape,
kernel_size=3,
stride=1,
padding=1
),
nn.ReLU(),
nn.Conv2d(
in_channels=hidden_shape,
out_channels=hidden_shape,
kernel_size=3,
stride=1,
padding=1
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.conv_block_2 = nn.Sequential(
nn.Conv2d(
in_channels=hidden_shape,
out_channels=hidden_shape,
kernel_size=3,
stride=1,
padding=1
),
nn.ReLU(),
nn.Conv2d(
in_channels=hidden_shape,
out_channels=hidden_shape,
kernel_size=3,
stride=1,
padding=1
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(in_features=hidden_shape*7*7,
out_features=out_shape)
)
def forward(self, x):
return self.classifier(self.conv_block_2(self.conv_block_1(x)))
i thought that problem is in how dataset is installed but i couldnt find anything
I fixed this problem by setting batch size to
32
.