The original AdderNet CNN is designed for processing images which are 2 Dimensional (https://arxiv.org/abs/1912.13200) (Github - https://github.com/huawei-noah/AdderNet). I would like to change the architecture of the AdderNet to process 1 dimensional inputs. However, the size of matrices are not completely aligned in my case. Therefore the performance is poor. Can you help me figure out where is the misalignment in my case?
import torch
import torch.nn as nn
import numpy as np
from torch.autograd import Function
import math
def adder1d_function(X, W, stride=1, padding=0):
n_filters, d_filter, w_filter = W.size()
n_x, d_x, w_x = X.size() # not changing thrg the process
w_out = (w_x - w_filter + 2 * padding) // stride + 1
w_out = int(w_out)
X_col = torch.nn.functional.unfold(X.view(1, -1, w_x), (1,w_filter), dilation=1, padding=padding, stride=stride).view(n_x, -1, w_out)
X_col = X_col.permute(1,2,0).contiguous().view(X_col.size(1),-1)
W_col = W.view(n_filters, -1) # n_filters independent
out = adder.apply(W_col,X_col)
out = out.view(n_filters, w_out, n_x)
out = out.permute(2, 0, 1).contiguous()
return out
class adder(Function):
@staticmethod
def forward(ctx, W_col, X_col):
ctx.save_for_backward(W_col,X_col)
output = -(W_col.unsqueeze(2)-X_col.unsqueeze(0)).abs().sum(1)
return output
@staticmethod
def backward(ctx,grad_output):
W_col,X_col = ctx.saved_tensors
grad_W_col = ((X_col.unsqueeze(0)-W_col.unsqueeze(2))*grad_output.unsqueeze(1)).sum(2)
grad_W_col = grad_W_col/grad_W_col.norm(p=2).clamp(min=1e-12)*math.sqrt(W_col.size(1)*W_col.size(0))/5
grad_X_col = (-(X_col.unsqueeze(0)-W_col.unsqueeze(2)).clamp(-1,1)*grad_output.unsqueeze(1)).sum(0)
return grad_W_col, grad_X_col
class adder1d(nn.Module):
def __init__(self,input_channel,output_channel,kernel_size, stride=1, padding=0, bias = False):
super(adder1d, self).__init__()
self.stride = stride
self.padding = padding
self.input_channel = input_channel
self.output_channel = output_channel
self.kernel_size = kernel_size
self.adder = torch.nn.Parameter(nn.init.xavier_normal_(torch.randn(output_channel,input_channel,kernel_size))) # delete here kernel_size dim
self.bias = bias
if bias:
self.b = torch.nn.Parameter(nn.init.uniform_(torch.zeros(output_channel)))
def forward(self, x):
output = adder1d_function(x,self.adder, self.stride, self.padding)
if self.bias:
output += self.b.unsqueeze(0).unsqueeze(2)#.unsqueeze(3)
return output
The inputs are of size torch.Size([131070, 1, 5]). Thank you.
The code is adapted to read and process the 1D sequence, but the performance is very bad.