Currently, I'm working on an image motion deblurring problem with PyTorch. I have two kinds of images: Blurry images (variable = blur_image) that are the input image and the sharp version of the same images (variable = shar_image), which should be the output. Now I wanted to try out transfer learning, but I can't get it to work.
Here is the code for my dataloaders:
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size,
shuffle = True)
validation_loader = torch.utils.data.DataLoader(valid_dataset,
batch_size=batch_size,
shuffle = False)
test_loader = torch.utils.data.DataLoader(test_dataset,
batch_size=batch_size,
shuffle = False)
Their shape:
Trainloader - Shape of blur_image [N, C, H, W]: torch.Size([16, 3, 128, 128])
Trainloader - Shape of sharp_image [N, C, H, W]: torch.Size([16, 3, 128, 128]) torch.float32
Validationloader - Shape of blur_image [N, C, H, W]: torch.Size([16, 3, 128, 128])
Validationloader - Shape of sharp_image [N, C, H, W]: torch.Size([16, 3, 128, 128]) torch.float32
Testloader- Shape of blur_image [N, C, H, W]: torch.Size([16, 3, 128, 128])
Testloader- Shape of sharp_image [N, C, H, W]: torch.Size([16, 3, 128, 128]) torch.float32
The way I use transfer learning (I thought that for the 'in_features' I have to put in the amount of pixels):
model = models.alexnet(pretrained=True)
model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features, 128)
device_string = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(device_string)
model = model.to(device)
The way I define my training process:
# Define the loss function (MSE was chosen due to the comparsion of pixels
# between blurred and sharp images
criterion = nn.MSELoss()
# Define the optimizer and learning rate
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Learning rate schedule - If the loss value does not improve after 5 epochs
# back-to-back then the new learning rate will be: previous_rate*0.5
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer,
mode='min',
patience=5,
factor=0.5,
verbose=True
)
def training(model, trainDataloader, epoch):
""" Function to define the model training
Args:
model (Model object): The model that is going to be trained.
trainDataloader (Dataloader object): Dataloader object of the trainset.
epoch (Integer): Number of training epochs.
"""
# Changing model into trainings mode
model.train()
# Supporting variable to display the loss for each epoch
running_loss = 0.0
running_psnr = 0.0
for i, data in tqdm(enumerate(trainDataloader),
total=int(len(train_dataset)/trainDataloader.batch_size)):
blur_image = data[0]
sharp_image = data[1]
# Transfer the blurred and sharp image instance to the device
blur_image = blur_image.to(device)
sharp_image = sharp_image.to(device)
# Sets the gradient of tensors to zero
optimizer.zero_grad()
outputs = model(blur_image)
loss = criterion(outputs, sharp_image)
# Perform backpropagation
loss.backward()
# Update the weights
optimizer.step()
# Add the loss that was calculated during the trainigs run
running_loss += loss.item()
# calculate batch psnr (once every `batch_size` iterations)
batch_psnr = psnr(sharp_image, blur_image)
running_psnr += batch_psnr
# Display trainings loss
trainings_loss = running_loss/len(trainDataloader.dataset)
final_psnr = running_psnr/int(len(train_dataset)/trainDataloader.batch_size)
final_ssim = ssim(sharp_image, blur_image, data_range=1, size_average=True)
print(f"Trainings loss: {trainings_loss:.5f}")
print(f"Train PSNR: {final_psnr:.5f}")
print(f"Train SSIM: {final_ssim:.5f}")
return trainings_loss, final_psnr, final_ssim
And here is my way to start the training:
train_loss = []
val_loss = []
train_PSNR_score = []
train_SSIM_score = []
val_PSNR_score = []
val_SSIM_score = []
start = time.time()
for epoch in range(nb_epochs):
print(f"Epoch {epoch+1}\n-------------------------------")
train_epoch_loss = training(model, train_loader, nb_epochs)
val_epoch_loss = validation(model, validation_loader, nb_epochs)
train_loss.append(train_epoch_loss[0])
val_loss.append(val_epoch_loss[0])
train_PSNR_score.append(train_epoch_loss[1])
train_SSIM_score.append(train_epoch_loss[2])
val_PSNR_score.append(val_epoch_loss[1])
val_SSIM_score.append(val_epoch_loss[2])
scheduler.step(train_epoch_loss[0])
scheduler.step(val_epoch_loss[0])
end = time.time()
print(f"Took {((end-start)/60):.3f} minutes to train")
But every time when I want to perform the training I receive the following error:
0%| | 0/249 [00:00<?, ?it/s]Epoch 1
-------------------------------
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/loss.py:528: UserWarning: Using a target size (torch.Size([16, 3, 128, 128])) that is different to the input size (torch.Size([16, 128])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-195-ff0214e227cd> in <module>()
9 for epoch in range(nb_epochs):
10 print(f"Epoch {epoch+1}\n-------------------------------")
---> 11 train_epoch_loss = training(model, train_loader, nb_epochs)
12 val_epoch_loss = validation(model, validation_loader, nb_epochs)
13 train_loss.append(train_epoch_loss[0])
<ipython-input-170-dfa2c212ad23> in training(model, trainDataloader, epoch)
25 optimizer.zero_grad()
26 outputs = model(blur_image)
---> 27 loss = criterion(outputs, sharp_image)
28
29 # Perform backpropagation
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/loss.py in forward(self, input, target)
526
527 def forward(self, input: Tensor, target: Tensor) -> Tensor:
--> 528 return F.mse_loss(input, target, reduction=self.reduction)
529
530
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in mse_loss(input, target, size_average, reduce, reduction)
2926 reduction = _Reduction.legacy_get_string(size_average, reduce)
2927
-> 2928 expanded_input, expanded_target = torch.broadcast_tensors(input, target)
2929 return torch._C._nn.mse_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
2930
/usr/local/lib/python3.7/dist-packages/torch/functional.py in broadcast_tensors(*tensors)
72 if has_torch_function(tensors):
73 return handle_torch_function(broadcast_tensors, tensors, *tensors)
---> 74 return _VF.broadcast_tensors(tensors) # type: ignore
75
76
RuntimeError: The size of tensor a (16) must match the size of tensor b (128) at non-singleton dimension 2
model structure:
AlexNet(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
(1): ReLU(inplace=True)
(2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(4): ReLU(inplace=True)
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): ReLU(inplace=True)
(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): ReLU(inplace=True)
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
(classifier): Sequential(
(0): Dropout(p=0.5, inplace=False)
(1): Linear(in_features=9216, out_features=4096, bias=True)
(2): ReLU(inplace=True)
(3): Dropout(p=0.5, inplace=False)
(4): Linear(in_features=4096, out_features=4096, bias=True)
(5): ReLU(inplace=True)
(6): Linear(in_features=4096, out_features=128, bias=True)
)
)
I'm a newbie in terms of using Pytorch (and image deblurring in general) and so I rather confused about the meaning of the error message and how to fix it. I tried to change my parameters and nothing worked. Does anyone have any advice for me on how to solve this problem?
I would appreciate every input :)
Here your you can't use
alexnet
for this task. becouse output from your model andsharp_image
should be shame. becauseconvnet
encode your image as enbeddings you and fully connected layers can not convert these images to its normal size you can not use fully connected layers for decoding, for obtain the same size you need to useConvTranspose2d()
for this task.your encoder should be:
And your decoder should be:
You can train your model like that:
you might want visit this for getting help in your project.