I have difficulties writing a custom loss function that makes use of some random weights generated according to the class/state predicted by the Softmax output. The desired property is:

  • The model is a simple feedforward neural network with input-dimension as 1 and the output dimension as 6.
  • The activation function of the output layer is Softmax, which intends to estimate the actual number of classes or states using Argmax.
  • Note that the training data only consists of X (there is no Y).
  • The loss function is defined according to random weights (i.e., Weibull distribution) sampled based on the predicted state number for each input sample X.

As follows, I provided a minimal example for illustration. For simplification purposes, I only define the loss function based on the random weights for state/class-1. I get: "ValueError: No gradients provided for any variable: ['dense_41/kernel:0', 'dense_41/bias:0', 'dense_42/kernel:0', 'dense_42/bias:0']."

As indicated in the post below, I found out that argmax is not differntiable, and a softargmax function would help (as I implemented in the following code). However, I still get the same error. Getting around tf.argmax which is not differentiable

import sys
import time
from tqdm import tqdm
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers
from scipy.stats import weibull_min

###############################################################################################
# Generate Dataset
lb  = np.array([2.0])   # Left boundary
ub  = np.array([100.0])  # Right boundary
# Data Points - uniformly distributed
N_r = 50
X_r = np.linspace(lb, ub, N_r)    
###############################################################################################
#Define Model
class DGM:
    # Initialize the class
    def __init__(self, X_r): 
        #Normalize training input data
        self.Xmean, self.Xstd = np.mean(X_r), np.std(X_r)
        X_r = (X_r - self.Xmean) / self.Xstd
        self.X_r = X_r
        #Input and output variable dimensions
        self.X_dim = 1; self.Y_dim = 6
        # Define tensors
        self.X_r_tf = tf.convert_to_tensor(X_r, dtype=tf.float32)
        #Learning rate
        self.LEARNING_RATE=1e-4
        #Feedforward neural network model
        self.modelTest = self.test_model()
    ###############################################
    # Initialize network weights and biases 
    def test_model(self):
        input_shape = self.X_dim
        dimensionality = self.Y_dim
        model = tf.keras.Sequential()
        model.add(layers.Input(shape=input_shape))
        model.add(layers.Dense(64, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
        model.add(layers.Activation('tanh'))
        model.add(layers.Dense(dimensionality))
        model.add(layers.Activation('softmax'))
        return model
    ##############################################        
    def compute_loss(self):
        #Define optimizer
        gen_opt = tf.keras.optimizers.Adam(lr=self.LEARNING_RATE, beta_1=0.0,beta_2=0.9)
        with tf.GradientTape() as test_tape:
            ###### calculate loss
            generated_u = self.modelTest(self.X_r_tf, training=True)
            #number of data
            n_data = generated_u.shape[0] 
            #initialize random weights assuming state-1 at all input samples
            wt1 = np.zeros((n_data, 1),dtype=np.float32) #initialize weights
            for b in range(n_data):
                wt1[b] = weibull_min.rvs(c=2, loc=0, scale =4 , size=1)   
            wt1 =  tf.reshape(tf.convert_to_tensor(wt1, dtype=tf.float32),shape=(n_data,1))
            #print('-----------sampling done-----------')  
            #determine the actual state using softargmax
            idst = self.softargmax(generated_u)
            idst = tf.reshape(tf.cast(idst, tf.float32),shape=(n_data,1))
            #index state-1
            id1 = tf.constant(0.,dtype=tf.float32)
            #assign weights if predicted state is state-1
            wt1_final = tf.cast(tf.equal(idst, id1), dtype=tf.float32)*wt1
            #final loss
            test_loss = tf.reduce_mean(tf.square(wt1_final)) 
            #print('-----------test loss calcuated-----------')

        gradients_of_modelTest = test_tape.gradient(test_loss,
                                                    [self.modelTest.trainable_variables])

        gen_opt.apply_gradients(zip(gradients_of_modelTest[0],self.modelTest.trainable_variables))

        return test_loss
#reference: Getting around tf.argmax which is not differentiable
#https://stackoverflow.com/questions/46926809/getting-around-tf-argmax-which-is-not-differentiable
    def softargmax(self, x, beta=1e10):
        x = tf.convert_to_tensor(x)
        x_range = tf.range(x.shape.as_list()[-1], dtype=x.dtype)
        return tf.reduce_sum(tf.nn.softmax(x*beta,axis=1) * x_range, axis=-1)

    ##############################################
    def train(self,training_steps=100):
        train_start_time = time.time()
        for step in tqdm(range(training_steps), desc='Training'):
            start = time.time()
            test_loss = self.compute_loss()          

            if (step + 1) % 10 == 0:
                elapsed_time = time.time() - train_start_time
                sec_per_step = elapsed_time / step
                mins_left = ((training_steps - step) * sec_per_step)
                tf.print("\nStep # ", step, "/", training_steps,
                         output_stream=sys.stdout)
                tf.print("Current time:", elapsed_time, " time left:",
                         mins_left, output_stream=sys.stdout)
                tf.print("Test Loss: ", test_loss, output_stream=sys.stdout)
###############################################################################################
#Define and train the model
model = DGM(X_r)
model.train(training_steps=100)
0

There are 0 best solutions below