Reshape data to be usable for training GCN in PyTorch

152 Views Asked by At

I am trying to build Graph Convolutional Network. I converted my dataframe to PyTorch required format using below code.

class S_Dataset(Dataset):
 def __init__(self, df, transform=None):
    self.df = df
    self.transform = transform

 def __len__(self):
    return len(self.df)

 def __getitem__(self, idx):
    row = self.df.iloc[idx]

    x = torch.tensor([row.date.to_pydatetime().timestamp(), row.s1, row.s2, row.s3, row.s4, row.temp ,row.rh, row.Location, row.Node ], dtype=torch.float)
    y = torch.tensor([row.Location], dtype=torch.long)
   
    weight1 = torch.tensor([row.neighbor1_distance], dtype=torch.float)
    weight2 = torch.tensor([row.neighbor2_distance], dtype=torch.float)
    weight3 = torch.tensor([row.neighbor3_distance], dtype=torch.float)

    edge_index1 = torch.tensor([[row.Location, row.neighbor1_name]], dtype=torch.long).t()
    edge_index2 = torch.tensor([[row.Location, row.neighbor2_name]], dtype=torch.long).t()
    edge_index3 = torch.tensor([[row.Location, row.neighbor3_name]], dtype=torch.long).t()


    edge_index = torch.cat([edge_index1, edge_index2, edge_index3 ], dim=1)
    weight = torch.cat([weight1, weight2, weight3], dim=0)

    if self.transform:
        x, y, edge_index, weight = self.transform(x, y, edge_index, weight)

    return x, y, edge_index, weight

Process_Data = S_Dataset(df)

Next I divided data into train and test set:

train_size = int(len(Process_Data) * 0.8)
test_size = len(Process_Data) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(Process_Data, [train_size, test_size])

# Create dataloaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True  )
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True  )

I designed a simple model:

import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.nn import GCNConv

# Create the model
class Net(nn.Module):
 def __init__(self):
    super(Net, self).__init__()
    self.conv1 = GCNConv(9, 128)
    self.conv2 = GCNConv(128, 64)
    self.fc1 = nn.Linear(64, 32)
    self.fc2 = nn.Linear(32, len(location_to_id))

 def forward(self, x, edge_index, weight):

    x = self.conv1(x, edge_index, weight)
    x = torch.relu(x)
    x = self.conv2(x, edge_index, weight)
    x = torch.relu(x)
    x = x.view(-1, 64)
    x = self.fc1(x)
    x = torch.relu(x)
    x = self.fc2(x)
    return x

Finally to train the model:

model = Net()
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

for epoch in range(100):
  total_loss = 0
  for batch in train_loader:
    optimizer.zero_grad()
    x, y, edge_index, weight = batch
    y_pred = model(x, edge_index, weight)
    loss = criterion(y_pred, y)
    loss.backward()
    optimizer.step()
    total_loss += loss.item()
  print('Epoch: {} Loss: {:.4f}'.format(epoch, total_loss / len(train_loader)))

I am facing following error:

IndexError: The shape of the mask [2, 3] at index 0 does not match the shape of the indexed tensor [32, 3] at index 0

x, y, edge_index, weight = batch

This line is causing error. How can I resphae my data so I can train my model?

1

There are 1 best solutions below

1
On

The batch size is set at 32, but there might not be enough samples to fit in the batch size of 32. I am assuming, this error occurs after the code runs for some time, I would appreciate more context on the problem A general solution could be decreasing the size of batch to something smaller and trying the code again. Making sure all samples are covered in the epoch.