How can I add a progress-bar so I can make sure my programme is running

176 Views Asked by At

When I run the program I can see that my pc uses more CPU etc, seeing something is running on the background. I still cannot make sure as the "train" function needs to run for couple of hours before any results to display, I would like to add a line where it will show me a progress bar regarding the progress and how far it made through. Here is the main body of the code:

import os
import copy 
import numpy as np
import torch
import torch.nn as nn
from matplotlib import pyplot as plt
from torch import optim 
from tqdm import tqdm
from utils import *
from modules import UNet_conditional, EMA
import logging 
from torch_geometric.datasets import Planetoid
from torch_geometric.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import argparse

logging.basicConfig(format="%(asctime)s - %(levelname)s: %(messages)s", level=logging.INFO, datefmt="%I:%M:%S")

class Diffusion:
    def __init__(self, noise_steps=1000, beta_start=1e-4, beta_end=0.82, img_size=64, device="cuda"):
        self.noise_steps = noise_steps
        self.beta_start = beta_start
        self.beta_end = beta_end
        self.img_size = img_size
        self.device = device 

        self.beta = self.prepare_noise_schedule().to(device)
        self.alpha = 1. - self.beta
        self.alpha_hat = torch.cumprod(self.alpha, dim=0)

    def prepare_noise_schedule(self):
        return torch.linspace(self.beta_start, self.beta_end, self.noise_steps)
    
    def noise_images(self, x, t):
        sqrt_alpha_hat = torch.sqrt(self.alpha_hat[t])[:, None, None, None]
        Ɛ = torch.randn_like(x)
        return sqrt_alpha_hat * x + sqrt_alpha_hat * Ɛ, Ɛ
    
    def sample_timesteps(self, n):
        return torch.randint(low=1, high=self.noise_steps, size=(n,))
    
    def sample(self, model, n, labels, cfg_scale=3):
        logging.info(f"Sampling {n} new images....")
        model.eval()
        with torch.no_grad():
            x = torch.randn((n, 3, self.img_size, self.img_size)).to(self.device)
            for i in tqdm(reversed(range(1, self.noise_steps)), position=0):
                t = (torch.ones(n) * i).long().to(self.device)
                predicted_noise = model(x, t, labels)
                if cfg_scale > 0:
                    uncond_predicted_noise = model(x, t, None)
                    predicted_noise = torch.lerp(uncond_predicted_noise, predicted_noise, cfg_scale)
                alpha = self.alpha[t][:, None, None, None]
                alpha_hat = self.alpha_hat[t][:, None, None, None]
                beta = self.beta[t][:, None, None, None]
                if i > 1:
                    noise = torch.randn_like(x)
                else:
                    noise = torch.zeros_like(x)
                x = 1 / torch.sqrt(alpha) * (x - ((1 - alpha) / (torch.sqrt(1 - alpha_hat))) * predicted_noise) + torch.sqrt(beta) * noise
            model.train()
            x = (x.clamp(-1, 1) + 1) / 2
            x = (x * 255).type(torch.uint8)
            return x 

class ClassTrain: 
    def train(args):
        setup_logging(args.run_name)
        device = args.device
        dataloader = get_data(args)
        model = UNet_conditional(num_classes=args.num_classes).to(device)
        optimizer = optim.AdamW(model.parameters(), lr=args.lr)
        mse = nn.MSELoss()
        diffusion = Diffusion(img_size=args.image_size, device=device)
        logger = SummaryWriter(os.path.join("runs", args.run_name))
        l = len(dataloader)
        ema = EMA(0.995)
        ema_model = copy.deepcopy(model).eval().requires_grad_(False)


        for epoch in range(args.epochs):
            logging.info(f"Starting epoch {epoch}:")
            pbar = tqdm(dataloader)
            for i, (images, labels) in enumerate(pbar):
                images = images.to(device)
                labels = labels.to(device)
                t = diffusion.sample_timesteps(images.shape[0]).to(device)
                x_t, noise = diffusion.noise_images(images, t)
                if np.random.random() < 0.1:
                    labels = None
                predicted_noise = model(x_t, t, labels)
                loss = mse(noise, predicted_noise)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                ema.step_ema(ema_model, model)

                pbar.set_postfix(MSE=loss.item())
                logger.add_scalar("MSE", loss.item(), global_step=epoch * 1 + i)

            if epoch % 10 == 0:
                labels = torch.arange(10).long().to(device)
                sampled_images = diffusion.sample(model, n=len(labels), labels=labels)
                ema_sampled_images = diffusion.sample(ema_model, n=len(labels), labels=labels)
                plot_images(sampled_images)
                save_images(sampled_images, os.path.join("results", args.run_name, f"{epoch}.jpg"))
                save_images(ema_sampled_images, os.path.join("results", args.run_name, f"{epoch}_ema.jpg"))
                torch.save(model.state_dict(), os.path.join("models", args.run_name, f"ckpt.pt"))
                torch.save(ema_model.state_dict(), os.path.join("models", args.run_name, f"ema_ckpt.pt"))
                torch.save(optimizer.state_dict(), os.path.join("models", args.run_name, f"optim.pt"))


        def launch():
            parser = argparse.ArgumentParser()
            args = parser.parse_args()
            args.run_name = "DDPM_conditional"
            args.epochs = 500
            args.batch_size = 12
            args.image_size = 64
            args.num_classes = 10
            args.dataset_path = r"C:\Users\suley\Desktop\finetuning\trainingdata"
            args.device = "cuda"
            args.lr = 3e-4
            
            my_instance = ClassTrain()
            my_instance.train(args)


        if __name__ == '__main__':
            launch()
            # device = cuda
            # model = UNet_conditional(num_classes=10).to(device)
            # ckpt = torch.load("./models/DDPM_conditional/ckpt.pt")
            # model.load_state_dict(ckpt)
            # diffusion = Diffusion(img_size=64, device=device)
            # n = 8
            # y = torch.Tensor([6] * n).long().to(device)
            # x = diffusion.sample(model, n, y, cfg_scale=0)
            # plot_images(x)

I want to see a progress bar in the terminal to show me how much time left(est) and some kind of percentage to see the progress so far.

1

There are 1 best solutions below

0
On

You could use tdqm. When you have an iterator processed in a for loop, you can wrap your iterator in tqdm, so that it produces a progress bar. I.e. instead of

for i in range(10000):
  ...

You would write:

from tqdm import tqdm
  for i in tqdm(range(10000)):
    ...

To get a progress bar like:

76%|████████████████████████████ | 7568/10000 [00:33<00:10, 229.00it/s]