Tensorflow Gradient Tape returns null

180 Views Asked by At

I'm trying to compute gradients using Gradient Tape in tensorflow.

Description -

  1. A - tf.constant

  2. X - tf.Variable

  3. Y - tf.Variable

Functions

  1. get_regularization_loss - computes the L1/L2 penalty

  2. construct_loss_function - computes the loss

  3. get_gradients_ - auto diff loss and compute the gradients wrt to X & Y

Currently I'm getting None for both X, Y. Any suggestions on what might be wrong?

import tensorflow as tf
    

def get_regularization_loss(X, loss_info):
    penalty = loss_info['penalty_type']
    alpha = loss_info['alpha']

#Extract sub matrix
    X_00, X_10, X_01, X_11 = loss_info['X_start_row'], loss_info['X_end_row'], loss_info['X_start_col'], loss_info['X_end_col']

    if penalty == 'L2':
        loss_regularization_X = get_L2_penalty(X[X_00:X_10, X_01:X_11], alpha)
    elif penalty == 'L1':
        loss_regularization_X = get_L1_penalty(X[X_00:X_10, X_01:X_11], alpha)
    else:
        loss_regularization_X = tf.Variable(0, dtype=tf.float64)

    return loss_regularization_X


def construct_loss_function(A, X, Y, loss_info):
    #Extract sub matrix
    A_00, A_10, A_01, A_11 = loss_info['A_start_row'], loss_info['A_end_row'], loss_info['A_start_col'], loss_info['A_end_col']
    X_00, X_10, X_01, X_11 = loss_info['X_start_row'], loss_info['X_end_row'], loss_info['X_start_col'], loss_info['X_end_col']
    Y_00, Y_10, Y_01, Y_11 = loss_info['Y_start_row'], loss_info['Y_end_row'], loss_info['Y_start_col'], loss_info['Y_end_col']

    loss_name = loss_info['loss']
    if loss_name == 'binary_crossentropy':
        exp_value = tf.math.exp(tf.matmul(X[X_00:X_10, X_01:X_11],Y[Y_00:Y_10, Y_01:Y_11]))
        log_odds = exp_value/(1+exp_value)
        loss = tf.reduce_sum(tf.keras.losses.binary_crossentropy(A[A_00:A_10, A_01:A_11], log_odds))
    else:
        loss = tf.Variable(0, dtype=tf.float64)

    return loss


def get_gradients(A, X, Y, Z_loss_list, X_loss_list, Y_loss_list):
    Z_loss = tf.Variable(0, dtype=tf.float64)
    X_loss = tf.Variable(0, dtype=tf.float64)
    Y_loss = tf.Variable(0, dtype=tf.float64)

    with tf.GradientTape(persistent=True) as tape:
        tape.watch(X)
        tape.watch(Y)
        for loss_info in A_loss_list:
            Z_loss.assign(Z_loss + construct_loss_function(A, X, Y, loss_info))
        
        for loss_info in X_loss_list:
            X_loss.assign(X_loss + get_regularization_loss(X, loss_info))

        for loss_info in Y_loss_list:
            Y_loss.assign(Y_loss+get_regularization_loss(Y, loss_info))
        
        loss = X_loss + Y_loss + Z_loss

    return_dictionary = {
        'total_loss': loss,
        'Z_loss': Z_loss,
        'loss_regularization_X': X_loss,
        'loss_regularization_Y': Y_loss,
        'gradients': tape.gradient(loss, {'X': X, 'Y': Y})
    }
    return return_dictionary

print(get_gradients(A, X, Y, Z_loss_list, X_loss_list, Y_loss_list))

Output - enter image description here

1

There are 1 best solutions below

0
On

Try to use all the values of the X ,Y tensors at the lines :

 exp_value = tf.math.exp(tf.matmul(X[X_00:X_10, X_01:X_11],Y[Y_00:Y_10, Y_01:Y_11]))
   

loss_regularization_X = get_L2_penalty(X[X_00:X_10, X_01:X_11], alpha)

Instead of slicing X and Y, you can fill the other values with large negative numbers so that they can't affect the value of the loss, and then use the entire X variabe.