I'm training on AMD GPU so I'm using an alternative framework tensorflow-directml (which runs on Tensorflow 1.15.8). When I use tensorflow latest version on CPU, I am able to train the models but I want to try using GPU. Please see my model below and the error. It shows [32] vs [32,528] but I'm not even sure where 32 is coming from. Class weights that I'm using is only 7 (output)
Model:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
NASNet (Model) (None, 7, 7, 1056) 4269716
_________________________________________________________________
global_average_pooling2d (Gl (None, 1056) 0
_________________________________________________________________
batch_normalization (BatchNo (None, 1056) 4224
_________________________________________________________________
reshape (Reshape) (None, None, 1) 0
_________________________________________________________________
average_pooling1d (AveragePo (None, None, 1) 0
_________________________________________________________________
dropout (Dropout) (None, None, 1) 0
_________________________________________________________________
dense (Dense) (None, None, 128) 256
_________________________________________________________________
dropout_1 (Dropout) (None, None, 128) 0
_________________________________________________________________
dense_1 (Dense) (None, None, 7) 903
=================================================================
Total params: 4,275,099
Trainable params: 3,271
Non-trainable params: 4,271,828
_________________________________________________________________
Code for fit:
learning_rate = 0.001
optimizer = tensorflow.keras.optimizers.Adam(learning_rate=learning_rate)
loss = tensorflow.keras.losses.CategoricalCrossentropy(from_logits=False)
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
with tensorflow.device('/device:DML:0'):
history = model.fit(m_train_ds,
epochs=15,
steps_per_epoch=len(m_train_ds), #steps = 758
validation_data=m_test_ds,
validation_steps=len(m_test_ds), #steps = 190
callbacks=[checkpoint_callback, early_stop],
verbose=1,
class_weight=m_class_weights
)
Error:
Epoch 1/15
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_6600\2019445536.py in <module>
10 callbacks=[checkpoint_callback, early_stop],
11 verbose=1,
---> 12 class_weight=m_class_weights
13 # class_weight=m_class_weights_np
14 )
~\AppData\Roaming\Python\Python37\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
725 max_queue_size=max_queue_size,
726 workers=workers,
--> 727 use_multiprocessing=use_multiprocessing)
728
729 def evaluate(self,
~\AppData\Roaming\Python\Python37\site-packages\tensorflow_core\python\keras\engine\training_generator.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing)
601 shuffle=shuffle,
602 initial_epoch=initial_epoch,
--> 603 steps_name='steps_per_epoch')
604
605 def evaluate(self,
~\AppData\Roaming\Python\Python37\site-packages\tensorflow_core\python\keras\engine\training_generator.py in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs)
263
264 is_deferred = not model._is_compiled
--> 265 batch_outs = batch_function(*batch_data)
266 if not isinstance(batch_outs, list):
267 batch_outs = [batch_outs]
~\AppData\Roaming\Python\Python37\site-packages\tensorflow_core\python\keras\engine\training.py in train_on_batch(self, x, y, sample_weight, class_weight, reset_metrics)
1015 self._update_sample_weight_modes(sample_weights=sample_weights)
1016 self._make_train_function()
-> 1017 outputs = self.train_function(ins) # pylint: disable=not-callable
1018
1019 if reset_metrics:
~\AppData\Roaming\Python\Python37\site-packages\tensorflow_core\python\keras\backend.py in __call__(self, inputs)
3474
3475 fetched = self._callable_fn(*array_vals,
-> 3476 run_metadata=self.run_metadata)
3477 self._call_fetch_callbacks(fetched[-len(self._fetches):])
3478 output_structure = nest.pack_sequence_as(
~\AppData\Roaming\Python\Python37\site-packages\tensorflow_core\python\client\session.py in __call__(self, *args, **kwargs)
1470 ret = tf_session.TF_SessionRunCallable(self._session._session,
1471 self._handle, args,
-> 1472 run_metadata_ptr)
1473 if run_metadata:
1474 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: Incompatible shapes: [32] vs. [32,528]
[[{{node metrics/acc/Equal}}]]
[[loss_3/dense_4_loss/weighted_loss/broadcast_weights/assert_broadcastable/is_valid_shape/has_valid_nonscalar_shape/has_invalid_dims/concat/_7061]]
(1) Invalid argument: Incompatible shapes: [32] vs. [32,528]
[[{{node metrics/acc/Equal}}]]
0 successful operations.
0 derived errors ignored.