Periodic/sinusoid MSE loss in the custom implementation of linear regression

Question

Periodic/sinusoid MSE loss in the custom implementation of linear regression

55 Views Asked by RafazZ At 05 March 2024 at 23:06

I was implementing PyTorch-like modules (for educational purposes), and ran a simple training routine to check. However, my loss is oscillating, and I am not sure why.

Below is the code. I put the loop first, but the implementation of the layers is below (might need to rearrange if running locally).

Data Generation

# These are the parameters that we want to learn
parameters = np.array([1.3, 0.0])


def make_data(N, a, b, *, noise=0.1, x_min=0.0, x_max=1.0):
    X = np.random.rand(N) * (x_max - x_min) + x_min
    X = X.reshape(-1, 1)
    y = X * a + b + np.random.randn(N, 1) * noise
    X_line = np.array([x_min, x_max])
    y_line = X_line * a + b
    return (X, y), (X_line, y_line)

(X, y), (Xline, yline) = make_data(50, *parameters, noise=0.05)
(X_validation, y_validation), _ = make_data(50, *parameters, noise=0.05)

plt.scatter(X, y)
plt.scatter(X_validation, y_validation, alpha=0.5)
plt.plot(Xline, yline)

plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])

Training loop (see below for module implementations)

criterion = MSELoss()
model = Sequential(
    Linear(1, 5, bias=True),
    ReLU(),
    Linear(5, 1, bias=True)
)


num_epochs = 1000

sgd_params = {
    'learning_rate': 1e-3,
    'weight_decay': 0.0,
    'schedule_scale': 1.0,
}

history = {
    'train': {
        'loss': [],
        'epoch': [],
    },
    'validation': {
        'loss': [],
        'epoch': [],
    }
}

for epoch in range(num_epochs):
    with TrainingContext(model, criterion) as tc:
        # Forward pass
        y_hat = model(X)
        loss = criterion(y_hat, y)

        # Backward pass in reverse order
        dL = criterion.backward(loss)
        model.backward(dL)

        # Update gradients
        model.update(**sgd_params)
        criterion.update(**sgd_params)
        
        # Scheduler
        sgd_params['learning_rate'] = sgd_params['learning_rate'] * sgd_params['schedule_scale']

    history['train']['epoch'].append(epoch)
    history['train']['loss'].append(loss)

    # Validation
    y_hat = model(X_validation)
    loss = criterion(y_hat, y_validation)
    history['validation']['epoch'].append(epoch)
    history['validation']['loss'].append(loss)

    # Tracking
    if (epoch+1) % 100 == 0:
        print(f'{epoch+1} / {num_epochs}: Training: {history["train"]["loss"][-1]:.2e} Validation: {history["validation"]["loss"][-1]:.2e}')

plt.plot(history['train']['epoch'], history['train']['loss'], label='Training')
plt.plot(history['validation']['epoch'], history['validation']['loss'], label='Validation')
plt.legend()

If I change the model to

model = Sequential(
    Linear(1, 1, bias=True),
)

I get

Modules definition

Base module, Sequential wrapper, and Training context manager

class Module:
    def __init__(self):
        self._save_for_backward = {}
        self._grad = {}

        self.is_training = False  # Don't set this manually
    
    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)

    def zero_grad(self):
        # print(f'===> DEBUG: zero_grad')
        if not self.is_training:
            raise RuntimeError('Please run zero_grad inside the training context')
        self._grad = {}
    
    def reset(self):
        self.zero_grad()
        self._save_for_backward = {}
    
    def save_for_backward(self, *args, **kwargs):
        r'''Saves or retrieves anything needed for training

        - If called with a positional argument ==> Returns saved value
        - If called with a keyword argument (with value assignment) ==> Saves the value
        '''
        if len(args) > 0 and len(kwargs) > 0:
            raise ValueError(f'Cannot save for backward and retrieve at the same time')
        elif len(args) == 0 and len(kwargs) == 0:
            return self._save_for_backward
        elif len(args) > 0:
            result = []
            for arg in args:
                result.append(self._save_for_backward[arg])
            if len(result) == 1:
                return result[0]
            else:
                return result
        elif self.is_training:
            for key, value in kwargs.items():
                self._save_for_backward[key] = value
        return None

    def update(self, *args, **kwargs):
        pass


class Sequential(Module):
    def __init__(self, *modules):
        self.modules = modules
        super().__init__()
    
    def forward(self, X, *args, **kwargs):
        for mod in self.modules:
            X = mod(X)
        return X
    
    def backward(self, dLdy):
        grad = dLdy
        # print(grad.shape)
        for mod in self.modules[::-1]:
            grad = mod.backward(grad)
            # print(grad.shape)
        return grad

    def update(self, *args, **kwargs):
        for mod in self.modules:
            mod.update(*args, **kwargs)

    @property
    def is_training(self):
        is_training = []
        for mod in self.modules:
            is_training.append(mod.is_training)
        return is_training
    
    @is_training.setter
    def is_training(self, value):
        if not isinstance(value, (list, tuple)):
            value = [value] * len(self.modules)
        for idx, mod in enumerate(self.modules):
            mod.is_training = value[idx]


class TrainingContext:
    r'''Makes sure the modules are in the training mode

    Usage:
        with TrainingContext(layer1, layer2, loss) as tc:
            ...
    '''
    def __init__(self, *modules, reset_on_exit=False):
        self.modules = modules
        self.old_states = []
        self.reset_on_exit = reset_on_exit
    
    def __enter__(self):
        for mod in self.modules:
            self.old_states.append(mod.is_training)
            mod.is_training = True
    
    def __exit__(self, *args, **kwargs):
        for idx, mod in enumerate(self.modules):
            mod.is_training = self.old_states[idx]
            if self.reset_on_exit:
                mod.reset()

MSE Loss and ReLU

class MSELoss(Module):
    def forward(self, y_hat, y):
        diff = y_hat - y
        self.save_for_backward(diff=diff, k=len(y))
        diff_sq = diff * diff
        return 0.5 * diff_sq.mean()
    
    def backward(self, loss):
        diff = self.save_for_backward('diff')
        k = self.save_for_backward('k')
        self._grad['loss'] = self._grad.get('loss', np.zeros_like(diff))
        self._grad['loss'] += diff / k
        return self._grad['loss']


class ReLU(Module):
    def forward(self, X):
        zeromask = X <= 0.0
        self.save_for_backward(zeromask=zeromask)
        y = X.copy()
        y[zeromask] = 0.0
        return y
    
    def backward(self, dLdy):
        dLdX = dLdy.copy()
        zeromask = self.save_for_backward('zeromask')
        dLdX[zeromask] = 0.0
        return dLdX

Linear Layer

class Linear(Module):
    def __init__(self, Cin, Cout, bias=True):
        super().__init__()
        self.Cin = Cin
        self.Cout = Cout
        self.weight = np.random.randn(Cin, Cout)
        self.bias = np.zeros(Cout) if bias else None

    def forward(self, X):
        # print(f'===> DEBUG: forward')
        if X.ndim == 1:
            X = X.reshape(-1, 1)
        if self.is_training:
            self.save_for_backward(X=X.copy())
        y = X @ self.weight
        return y

    def backward(self, dLdy):
        # dLdy.shape = N x Cout
        # dydw.shape = N x Cin
        # print(f'===> DEBUG: backward')
        if not self.is_training:
            raise RuntimeError('Please run backward inside the training context')
        dydX = self.weight.T
        dLdX = dLdy @ dydX
        
        dydw = self.save_for_backward('X')
        self._grad['weight'] = self._grad.get('weight', np.zeros_like(self.weight))
        self._grad['weight'] += dydw.T @ dLdy

        if self.bias is not None:
            self._grad['bias'] = self._grad.get('bias', np.zeros_like(self.bias))
            self._grad['bias'] += dLdy.sum(0)
        
        return dLdX

    def update(self, learning_rate=1e-3, weight_decay=1e-4, zero_grad=True, *args, **kwargs):
        # print(f'===> DEBUG: update')
        if not self.is_training:
            raise RuntimeError('Please run update inside the training context')
        self.weight -= learning_rate * (self._grad['weight'] + weight_decay * self.weight)
        if self.bias is not None:
            self.bias -= learning_rate * (self._grad['bias'] + weight_decay * self.bias)
        if zero_grad:
            self.zero_grad()

Original Q&A

Periodic/sinusoid MSE loss in the custom implementation of linear regression

Data Generation

Training loop (see below for module implementations)

Modules definition

Base module, Sequential wrapper, and Training context manager

MSE Loss and ReLU

Linear Layer

There are 0 best solutions below

Related Questions in PYTHON

Related Questions in PYTORCH

Related Questions in LINEAR-REGRESSION

Related Questions in MSE

Trending Questions

Popular # Hahtags

Popular Questions