I want to do some CNN with Pytorch, but I got this error:
RuntimeError: [enforce fail at C:\cb\pytorch_1000000000000\work\c10\core\impl\alloc_cpu.cpp:81] data. DefaultCPUAllocator: not enough memory: you tried to allocate 412876800 bytes.
The dataset I am using is 3410 images consists of 0-9, A-Z, and a-z. I assume the dataset is not that large to the point that I don't have enough RAM to process them.
I read about some possible solutions to reduce batch size, but when I reduce the batch_size to 16 and run the file, nothing happens. Does anyone have any idea how to solve this?
Here is my code:
class WritingDataset(Dataset):
def __init__(self, csv_file, root_dir, transform=None):
self.annotations = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.annotations)
def __getitem__(self, index):
img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
image = io.imread(img_path)
y_label = torch.tensor(int(self.annotations.iloc[index, 1]))
if self.transform:
image = self.transform(image)
return (image, y_label)
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Hyperparameters
in_channel = 3
num_classes = 2
learning_rate = 1e-3
batch_size = 32
num_epochs = 5
# Load Data
dataset = WritingDataset(
csv_file='english.csv',
root_dir='Img',
transform=transforms.ToTensor()
)
train_set, test_set = torch.utils.data.random_split(dataset, [3000, 410])
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True)
# Model
model = torchvision.models.googlenet(pretrained=True)
model.to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Train Network
for epoch in range(num_epochs):
losses = []
for batch_idx, (data, targets) in enumerate(train_loader):
# Get data to cuda if possible
data = data.to(device=device)
targets = targets.to(device=device)
# forward
scores = model(data)
loss = criterion(scores, targets)
losses.append(loss.item())
# backward
optimizer.zero_grad()
loss.backward()
# gradient descent or adam step
optimizer.step()
print(f"Cost at epoch {epoch} is {sum(losses)/len(losses)}")
# Check accuracy on training & test
def check_accuracy(loader, model):
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x)
_, predictions = scores.max(1)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print(
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
)
model.train()
If you have enough VRAM memory, try to increase the pagefile (virtual memory) size. The better option is to set 'System managed', or manually increase it to higher values.