I am trying to build Graph Convolutional Network. I converted my dataframe to PyTorch required format using below code.
class S_Dataset(Dataset):
def __init__(self, df, transform=None):
self.df = df
self.transform = transform
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
row = self.df.iloc[idx]
x = torch.tensor([row.date.to_pydatetime().timestamp(), row.s1, row.s2, row.s3, row.s4, row.temp ,row.rh, row.Location, row.Node ], dtype=torch.float)
y = torch.tensor([row.Location], dtype=torch.long)
weight1 = torch.tensor([row.neighbor1_distance], dtype=torch.float)
weight2 = torch.tensor([row.neighbor2_distance], dtype=torch.float)
weight3 = torch.tensor([row.neighbor3_distance], dtype=torch.float)
edge_index1 = torch.tensor([[row.Location, row.neighbor1_name]], dtype=torch.long).t()
edge_index2 = torch.tensor([[row.Location, row.neighbor2_name]], dtype=torch.long).t()
edge_index3 = torch.tensor([[row.Location, row.neighbor3_name]], dtype=torch.long).t()
edge_index = torch.cat([edge_index1, edge_index2, edge_index3 ], dim=1)
weight = torch.cat([weight1, weight2, weight3], dim=0)
if self.transform:
x, y, edge_index, weight = self.transform(x, y, edge_index, weight)
return x, y, edge_index, weight
Process_Data = S_Dataset(df)
Next I divided data into train and test set:
train_size = int(len(Process_Data) * 0.8)
test_size = len(Process_Data) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(Process_Data, [train_size, test_size])
# Create dataloaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True )
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True )
I designed a simple model:
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.nn import GCNConv
# Create the model
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = GCNConv(9, 128)
self.conv2 = GCNConv(128, 64)
self.fc1 = nn.Linear(64, 32)
self.fc2 = nn.Linear(32, len(location_to_id))
def forward(self, x, edge_index, weight):
x = self.conv1(x, edge_index, weight)
x = torch.relu(x)
x = self.conv2(x, edge_index, weight)
x = torch.relu(x)
x = x.view(-1, 64)
x = self.fc1(x)
x = torch.relu(x)
x = self.fc2(x)
return x
Finally to train the model:
model = Net()
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()
for epoch in range(100):
total_loss = 0
for batch in train_loader:
optimizer.zero_grad()
x, y, edge_index, weight = batch
y_pred = model(x, edge_index, weight)
loss = criterion(y_pred, y)
loss.backward()
optimizer.step()
total_loss += loss.item()
print('Epoch: {} Loss: {:.4f}'.format(epoch, total_loss / len(train_loader)))
I am facing following error:
IndexError: The shape of the mask [2, 3] at index 0 does not match the shape of the indexed tensor [32, 3] at index 0
x, y, edge_index, weight = batch
This line is causing error. How can I resphae my data so I can train my model?
The batch size is set at 32, but there might not be enough samples to fit in the batch size of 32. I am assuming, this error occurs after the code runs for some time, I would appreciate more context on the problem A general solution could be decreasing the size of batch to something smaller and trying the code again. Making sure all samples are covered in the epoch.