import tensorflow as tf
RANDOM_SEED_CONSTANT = 42 # FOR_REPRODUCIBILITY
tf.random.set_seed(RANDOM_SEED_CONSTANT)
# Prevent NHWC errors https://www.nuomiphp.com/eplan/en/50125.html
from tensorflow.keras import backend as K
K.set_image_data_format("channels_last")
from tensorflow import keras
from tensorflow.keras import datasets, layers, models
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
train_images, test_images = train_images / 255.0, test_images / 255.0 # Normalize pixel values to be between 0 and 1
# Create a simple CNN
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64,
activation='relu',
kernel_initializer=tf.keras.initializers.HeNormal(seed=RANDOM_SEED_CONSTANT)))
model.add(layers.Dense(10,
kernel_initializer=tf.keras.initializers.HeNormal(seed=RANDOM_SEED_CONSTANT)))
print(model.summary())
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model.save_weights('myweights.h5')
# Run1
history = model.fit(train_images, train_labels, epochs=1,
shuffle=False,
validation_data=(test_images, test_labels))
# Run2
model.load_weights('myweights.h5')
history = model.fit(train_images, train_labels, epochs=1,
shuffle=False,
validation_data=(test_images, test_labels))
# Run3
model.load_weights('myweights.h5')
history = model.fit(train_images, train_labels, epochs=1,
shuffle=False,
validation_data=(test_images, test_labels))
The above 3 model.fit() calls gives me the following results:
1563/1563 [==============================] - 7s 4ms/step - loss: 1.4939 - accuracy: 0.4543 - val_loss: 1.2516 - val_accuracy: 0.5567
1563/1563 [==============================] - 6s 4ms/step - loss: 1.6071 - accuracy: 0.4092 - val_loss: 1.3857 - val_accuracy: 0.4951
1563/1563 [==============================] - 7s 4ms/step - loss: 1.5538 - accuracy: 0.4325 - val_loss: 1.3187 - val_accuracy: 0.5294
What is the reason for this difference? I am trying to understand sources which might impede reproducing results from models. Apart from random seed, dense layers initialization, what else am I missing?
The way you are testing the reproducibility is not correct. You need to close the program and rerun it to see if the results are the same. Otherwise, the run 2 depends on the events that happened during the run 1, and the run 3 depends on the events that happened during the run 1 and 2.
The reason is that Tensorflow maintains an internal counter for random generation, as stated in the documentation of
tf.random.set_seed
(emphasis is mine) :If I run only the first run of your program twice, closing the program between each run (in IPython in that case), I get:
Minus the time taken to perform the computation, that can vary a bit depending on the load on the machine, the results are completely identical.