LSTM : predict_step in PyTorch Lightning

29 Views Asked by At

I've developed code for an LSTM model, but I'm uncertain about how to utilize it for predictions in a production environment. Could you please assist? In the provided predict.py script, I aim to utilize the model to predict the next 14 time periods.

Additionally, I initialize the new_data variable with the last 10 values from the training data.

However, I've encountered an issue where all the predictions are identical and remain the same.

model.py

import lightning as L
import torch
from torch import nn

from torchmetrics.regression import MeanAbsoluteError


class LSTMRegressor(L.LightningModule):
    """
    Standard PyTorch Lightning module:
    https://pytorch-lightning.readthedocs.io/en/latest/lightning_module.html
    """

    def __init__(
        self,
        n_features,
        hidden_size,
        num_layers,
        dropout,
        learning_rate,
        criterion,
        output_size,
        **kwargs,
    ):
        super(LSTMRegressor, self).__init__()
        self.n_features = n_features
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout = dropout
        self.criterion = criterion
        self.save_hyperparameters()
        self.learning_rate = learning_rate
        self.mae = MeanAbsoluteError()

        self.lstm = nn.LSTM(
            input_size=n_features,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout,
            batch_first=True,
            bidirectional=False,
        )
        # fully connected layer/ Dense Layer
        self.fc = nn.Linear(hidden_size, output_size)
        # self.relu = nn.ReLU()

    def forward(self, x):

        lstm_out, _ = self.lstm(x)

        y_pred = self.fc(lstm_out[:, -1])
        # y_pred = self.relu(y_pred)  # Apply ReLU activation

        return y_pred

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)

        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

        return {
            "optimizer": optimizer,
            "lr_scheduler": scheduler,
        }

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.criterion(y_hat, y)
        self.log("train_loss", loss, on_step=False, on_epoch=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.criterion(y_hat, y)
        val_mae = self.mae(y_hat, y)
        self.log("val_loss", loss, on_step=False, on_epoch=True)
        self.log("val_mae", val_mae)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.criterion(y_hat, y)

        self.log("test_loss", loss, on_step=False, on_epoch=True)

        return loss

    def on_epoch_end(self):
        # Log validation loss at the end of each epoch
        val_loss = self.trainer.callback_metrics["val_loss"]
        self.log("val_loss_epoch", val_loss, on_epoch=True, prog_bar=True)

    def predict_step(self, batch, batch_idx: int, dataloader_idx: int = None):
        print("Running predict_step.")
        x = batch
        y_pred = self.forward(x)
        return y_pred

predict.py

import torch
import numpy as np
from model import LSTMRegressor

# from data_set import EpiCountsDataModule
from config import p


# Load the pre-trained model
model = LSTMRegressor.load_from_checkpoint(
    "./checkpoints/model-epoch=29-val_loss=11.96.ckpt",
    n_features=p["n_features"],
    hidden_size=p["hidden_size"],
    criterion=p["criterion"],
    num_layers=p["num_layers"],
    dropout=p["dropout"],
    learning_rate=p["learning_rate"],
    output_size=p["output_size"],
)
model.eval()

# Define new data
new_data = np.array([[5.45], [5.43], [5.45], [5.43], [5.36], [5.33], [5.21]])
# Convert the new data to torch tensor
new_data_tensor = torch.tensor(new_data, dtype=torch.float32)

new_data_tensor = new_data_tensor.unsqueeze(0)
print(new_data_tensor)


# Make predictions using the model
predictions = []

# Make predictions
with torch.no_grad():
    for i in range(14):
        prediction = model(new_data_tensor)  # Assuming forward pass is sufficient
        print(f"Print Predictions : {prediction}")

        # Update new_data_tensor with the prediction for the next iteration
        new_data_tensor = torch.cat(
            (new_data_tensor[:, 1:], prediction.unsqueeze(0)), dim=1
        )
        print(f"Updated new data tensor : {new_data_tensor}")


        predictions.append(prediction)

        # print(prediction)

print(predictions)

train.py

from data_set import EpiCountsDataModule
from lightning import Trainer
from model import LSTMRegressor
from config import p, tensorboard_logger  # , wandb_logger
from callbacks import PrintingCallback, early_stop_callback, checkpoint_callback


def main():
    """
    All parameters are aggregated in one place.
    This is useful for reporting experiment params to
    experiment tracking software
    """

    # set the data module
    dm = EpiCountsDataModule(
        seq_len=p["seq_len"],  # type: ignore
        batch_size=p["batch_size"],  # type: ignore
        num_workers=p["num_workers"],  # type: ignore
    )

    # build the model
    model = LSTMRegressor(
        n_features=p["n_features"],
        hidden_size=p["hidden_size"],
        criterion=p["criterion"],
        num_layers=p["num_layers"],
        dropout=p["dropout"],
        learning_rate=p["learning_rate"],
        output_size=p["output_size"],
    )

    trainer = Trainer(
        accelerator="auto",
        max_epochs=p["max_epochs"],  # type: ignore
        # logger=[tensorboard_logger, wandb_logger],
        logger=[tensorboard_logger],
        fast_dev_run=False,
        num_sanity_val_steps=1,
        # callbacks=[ModelSummary(max_depth=-1)],
        # log_every_n_steps=2,
        # overfit_batches=5,
        callbacks=[PrintingCallback(), early_stop_callback, checkpoint_callback],

        benchmark=True,
    )

    trainer.fit(model, dm)
    trainer.test(model, datamodule=dm)



if __name__ == "__main__":
    main()
    # tensorboard --logdir=runs

I'm uncertain about how to utilize it for predictions in a production environment. Could you please assist?

0

There are 0 best solutions below