training and validation losses decreasing slowly

116 Views Asked by At

i have implemented 2DCNN model followed by GRU layer

class CNN2D(nn.Module):
    def __init__(self, img_x=88, img_y=88, fc_hidden1=512, fc_hidden2=512, drop_p=0.3, CNN_embed_dim=512,num_classes=9):
        super(CNN2D, self).__init__()

        self.img_x = img_x
        self.img_y = img_y
        self.CNN_embed_dim = CNN_embed_dim

       
        self.ch1, self.ch2, self.ch3, self.ch4 = 8, 16, 32, 64
        self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), (3, 3)      
        self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), (2, 2)      
        self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), (0, 0)  

        self.conv1_outshape = conv2D_output_size((self.img_x, self.img_y), self.pd1, self.k1, self.s1)  # Conv1 output shape
        self.conv2_outshape = conv2D_output_size(self.conv1_outshape, self.pd2, self.k2, self.s2)
        self.conv3_outshape = conv2D_output_size(self.conv2_outshape, self.pd3, self.k3, self.s3)
        self.conv4_outshape = conv2D_output_size(self.conv3_outshape, self.pd4, self.k4, self.s4)

        # fully connected layer hidden nodes
        self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2
        self.drop_p = drop_p

        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=self.ch1, kernel_size=self.k1, stride=self.s1, padding=self.pd1),
            nn.BatchNorm2d(self.ch1, momentum=0.01),
            nn.ReLU(inplace=True),                      
            # nn.MaxPool2d(kernel_size=2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=self.ch1, out_channels=self.ch2, kernel_size=self.k2, stride=self.s2, padding=self.pd2),
            nn.BatchNorm2d(self.ch2, momentum=0.01),
            nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=2),
        )

        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=self.ch2, out_channels=self.ch3, kernel_size=self.k3, stride=self.s3, padding=self.pd3),
            nn.BatchNorm2d(self.ch3, momentum=0.01),
            nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=2),
        )

        self.conv4 = nn.Sequential(
            nn.Conv2d(in_channels=self.ch3, out_channels=self.ch4, kernel_size=self.k4, stride=self.s4, padding=self.pd4),
            nn.BatchNorm2d(self.ch4, momentum=0.01),
            nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=2),
        )

        self.drop = nn.Dropout2d(self.drop_p)
        self.pool = nn.MaxPool2d(2)
        #self.fc1 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], self.fc_hidden1)   # fully connected layer, output k classes
        #self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2)
        self.fc3 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], self.CNN_embed_dim)   # output = CNN embedding latent variables
        
        self.num_classes = num_classes
        
        self.gru = nn.GRU(
            input_size=self.CNN_embed_dim,
            hidden_size=256,        
            num_layers=1,       
            batch_first=True,(batch, time_step, input_size)
        )
        #self.gfc1 = nn.Linear(256, 128)
        self.gfc2 = nn.Linear(256, self.num_classes)

    def forward(self, x_3d):
        cnn_embed_seq = []
        for t in range(x_3d.size(2)):
            # CNNs
            x = self.conv1(x_3d[:, :, t, :, :])
            x = self.conv2(x)
            x = self.conv3(x)
            x = self.conv4(x)
            x = x.reshape(x.size(0), -1)           

            x = F.relu(self.fc1(x))
            x = F.dropout(x, p=self.drop_p, training=self.training)
            x = F.relu(self.fc2(x))
            x = F.dropout(x, p=self.drop_p, training=self.training) 
            x = self.fc3(x)
            cnn_embed_seq.append(x)

        
        cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0).transpose_(0, 1)
        

        RNN_out, _ = self.gru(cnn_embed_seq, None)

        x = RNN_out[:, -1, :]  
        x = F.relu(x) 
        x = F.dropout(x, p=self.drop_p, training=self.training) NEW UPDATE
        x = self.gfc2(x)

        return x

inputs are videos of shape [batch,channels,frames,height,width] i used adam optimizer with lr=1e-5 ,weight_decay=5e-5 ,amsgrad=True and cross entropy loss

training and validation losses are decreasing slowly and model is not converging what should i change ?

0

There are 0 best solutions below