No gradients provided for any variable / custom loss

70 Views Asked by At

OK so I'm trying to implement this custom loss I read in a paper and it's my first custom loss so I might be missing something obvious, but I get the error shown below. From other posts I gather that my loss tensor might be "detached" from my other tf variables? but still I can't fix it. Any ideas? thank you!!

error message

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.utils.vis_utils import plot_model
import math

#padding arrays
input_padding_categories = [[0 for i in range(38)] for j in range(42)]
output_padding = [0 for i in range(19)]

# random data generation for testing
import random
data_size = 7537

inputs = [[[random.randint(0, 1) for i in range(38)] for j in range(42)] for k in range(data_size)]
print("inputs_gen shape")
print(len(inputs))
print(len(inputs[0]))
print(len(inputs[0][0]))

outputs = [[random.randint(0, 1) for i in range(19)] for j in range(data_size)]
print("outputs_gen shape")
print(len(outputs))
print(len(outputs[0]))

batch = 32
timesteps = 42
training_units = 0.85

cutting_point = int(len(inputs)*training_units)
x_train, x_test = inputs[:cutting_point], inputs[cutting_point:]
y_train, y_test = outputs[:cutting_point], outputs[cutting_point:]

temp = 0
while temp < len(x_train):
    temp += 32
while len(x_train) < temp:
    x_train.append(input_padding_categories)
    y_train.append(output_padding)
temp = 0
while temp < len(x_test):
    temp += 32
while len(x_test) < temp:
    x_test.append(input_padding_categories)
    y_test.append(output_padding)
x_train = np.array(x_train)
x_test = np.array(x_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

def custom_loss_comprende(y, yhat):
    loss = [0.0 for i in range(y.shape[0])]
    for i in range(y.shape[0]):
        loss[i] -= np.dot(y[i] , [math.log(z,10) for z in [x + 1/10**9 for x in yhat[i]]])
        loss[i] -= np.dot([1 + x for x in y[i]] , [math.log(z,10) for z in [1 - x + 1/10**9 for x in yhat[i]]])
    loss = tf.convert_to_tensor(loss)
    return loss

input_layer = keras.Input(shape=(timesteps,38),batch_size=batch)
hidden_layer_1 = layers.LSTM(units=150, batch_input_shape=(batch,timesteps,42) , activation="sigmoid", return_sequences=True)(input_layer)
hidden_layer_2 = layers.LSTM(150, activation="sigmoid", return_sequences=False)(hidden_layer_1)
output_layer = layers.Dense(19, activation="softmax")(hidden_layer_2)
model = keras.Model(inputs=input_layer, outputs=output_layer, name="my_model")
model.summary()
plot_model(model, show_shapes=True, show_layer_names=True)

model.compile(
    loss=custom_loss_comprende,
    optimizer=keras.optimizers.Adam(),
    metrics=["accuracy"],
    run_eagerly=True
)

h = model.fit(x_train, y_train, epochs=20, validation_data=(x_test, y_test), shuffle=True)
0

There are 0 best solutions below