I'm trainig a NN with the following code:
model = tf.keras.Sequential([
tf.keras.layers.InputLayer(input_shape=(input_length,)),
tf.keras.layers.Dropout(0.8, seed=42),
tf.keras.layers.Dense(units=200, activation='relu'),
tf.keras.layers.Dropout(0.65, seed=42),
tf.keras.layers.Dense(units=400, activation='relu'),
tf.keras.layers.Dropout(0.65, seed=42),
tf.keras.layers.Dense(units=300, activation='relu'),
tf.keras.layers.Dropout(0.65, seed=42),
tf.keras.layers.Dense(units=200, activation='relu'),
#tf.keras.layers.Dense(units=2, activation='softmax')
tf.keras.layers.Dense(units=1, activation='sigmoid')
])
#create weights for classes
weights = sklearn.utils.class_weight.compute_class_weight(class_weight='balanced',classes=np.unique(y_train),y=[x[0] for x in y_train])
my_weight={0:weights[0],1:weights[1]}
model.compile(optimizer='adam',
#loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
loss='binary_crossentropy',
metrics=['accuracy'])
early_stop=tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=100,
restore_best_weights=True
)
model_checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
filepath='./../../../data/models/',
monitor='val_accuracy',
mode='max',
save_best_only=True
)
callbacks=[
early_stop,
#model_checkpoint_callback
]
history=model.fit(x_train, y_train,
batch_size= 16,
epochs=500,
validation_data=(x_val,y_val),
callbacks=callbacks,
class_weight=my_weight
)
These are the corresponding plots for loss and accuracy:
This is the corresponding code:
def plot_acc(history,eval_ret):
plt.plot(history.history['accuracy'],label='train_acc', color='red')
plt.plot(history.history['val_accuracy'],label='val_acc',color='blue')
plt.axhline(eval_ret[1],label='test_acc',color='green')
plt.title('Accuracy of the model over training epochs')
plt.ylabel('Accuracy')
plt.xlabel('Training epochs')
plt.legend()
plt.show()
def plot_loss(history,eval_ret):
plt.plot(history.history['loss'],label='train_loss', color='red')
plt.plot(history.history['val_loss'],label='val_loss',color='blue')
plt.axhline(eval_ret[0],label='test_loss',color='green')
plt.title('Loss of the model over training epochs')
plt.ylabel('Loss')
plt.xlabel('Training epochs')
plt.legend()
plt.show()
eval_ret=model.evaluate(x_test, y_test)
plot_loss(history,eval_ret)
plot_acc(history,eval_ret)
When evaluating the loss/accuracy on the sets, I get the following output:
print('train:',model.evaluate(x_train, y_train,verbose=None))
print('test:',model.evaluate(x_test, y_test,verbose=None))
print('validate:',model.evaluate(x_val, y_val,verbose=None))
What I don't understand:
- Why is loss and accuracy for training data worse than for test/validation data?
- The calculated train accuracy for the final model is at 96%. The plot of the accuracy from the training history shows the train accuracy under 60% all the time. Why is this the case?
Based on this explanation, the dropout layers are responsible. Drop outs are only performed, when training. They are not applied when evaluating.
This means that based on the drop outs, the performance is at around 55%. When considering all nodes (no drop outs), the performance is much better - around 95%.