# Detect hardware
try:
tpu_resolver = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
except ValueError:
tpu_resolver = None
gpus = tf.config.experimental.list_logical_devices("GPU")
# Select appropriate distribution strategy
if tpu_resolver:
tf.config.experimental_connect_to_cluster(tpu_resolver)
tf.tpu.experimental.initialize_tpu_system(tpu_resolver)
strategy = tf.distribute.experimental.TPUStrategy(tpu_resolver)
print('Running on TPU ', tpu_resolver.cluster_spec().as_dict()['worker'])
elif len(gpus) > 1:
strategy = tf.distribute.MirroredStrategy([gpu.name for gpu in gpus])
print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
elif len(gpus) == 1:
strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
print('Running on single GPU ', gpus[0].name)
else:
strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
print('Running on CPU')
print("Number of accelerators: ", strategy.num_replicas_in_sync)
Then I loaded my model which was saved in hdf5 format.
with strategy.scope():
model.compile(optimizer='adam', loss='mse' , metrics=['accuracy'])
model.fit(ds_train,validation_data=ds_test, epochs=2, verbose=1)
model.save('/content/drive/MyDrive/saved models/colorize.h5')
Then I got the error on model.compile : Variable (<tf.Variable 'conv2d/kernel:0' shape=(3, 3, 1, 64) dtype=float32, numpy= array(dtype=float32)>) was not created in the distribution strategy scope of (<tensorflow.python.distribute.tpu_strategy.TPUStrategyV2 object at 0x7c70fcd47fa0>). It is most likely because some layers, model, or optimizer was being created outside the distribution strategy scope. Try to make sure your code looks similar to the following. with strategy.scope(): model=_create_model() model.compile(...)
I thought it may have some problem with hdf5 model format. So I tried using model.keras
format. But that didn't help