Invalid argument: Incompatible shapes: [32] vs. [32,528] using directml Tensorflow 1.15

31 Views Asked by At

I'm training on AMD GPU so I'm using an alternative framework tensorflow-directml (which runs on Tensorflow 1.15.8). When I use tensorflow latest version on CPU, I am able to train the models but I want to try using GPU. Please see my model below and the error. It shows [32] vs [32,528] but I'm not even sure where 32 is coming from. Class weights that I'm using is only 7 (output)

Model:

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
NASNet (Model)               (None, 7, 7, 1056)        4269716   
_________________________________________________________________
global_average_pooling2d (Gl (None, 1056)              0         
_________________________________________________________________
batch_normalization (BatchNo (None, 1056)              4224      
_________________________________________________________________
reshape (Reshape)            (None, None, 1)           0         
_________________________________________________________________
average_pooling1d (AveragePo (None, None, 1)           0         
_________________________________________________________________
dropout (Dropout)            (None, None, 1)           0         
_________________________________________________________________
dense (Dense)                (None, None, 128)         256       
_________________________________________________________________
dropout_1 (Dropout)          (None, None, 128)         0         
_________________________________________________________________
dense_1 (Dense)              (None, None, 7)           903       
=================================================================
Total params: 4,275,099
Trainable params: 3,271
Non-trainable params: 4,271,828
_________________________________________________________________

Code for fit:

learning_rate = 0.001
optimizer = tensorflow.keras.optimizers.Adam(learning_rate=learning_rate)
loss = tensorflow.keras.losses.CategoricalCrossentropy(from_logits=False)
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

with tensorflow.device('/device:DML:0'):
    history = model.fit(m_train_ds,
                        epochs=15,
                        steps_per_epoch=len(m_train_ds), #steps = 758
                        validation_data=m_test_ds,
                        validation_steps=len(m_test_ds), #steps = 190
                        callbacks=[checkpoint_callback, early_stop],
                        verbose=1,
                        class_weight=m_class_weights
                       )

Error:

Epoch 1/15
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_6600\2019445536.py in <module>
     10                         callbacks=[checkpoint_callback, early_stop],
     11                         verbose=1,
---> 12                         class_weight=m_class_weights
     13 #                         class_weight=m_class_weights_np
     14                        )

~\AppData\Roaming\Python\Python37\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    725         max_queue_size=max_queue_size,
    726         workers=workers,
--> 727         use_multiprocessing=use_multiprocessing)
    728 
    729   def evaluate(self,

~\AppData\Roaming\Python\Python37\site-packages\tensorflow_core\python\keras\engine\training_generator.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing)
    601         shuffle=shuffle,
    602         initial_epoch=initial_epoch,
--> 603         steps_name='steps_per_epoch')
    604 
    605   def evaluate(self,

~\AppData\Roaming\Python\Python37\site-packages\tensorflow_core\python\keras\engine\training_generator.py in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs)
    263 
    264       is_deferred = not model._is_compiled
--> 265       batch_outs = batch_function(*batch_data)
    266       if not isinstance(batch_outs, list):
    267         batch_outs = [batch_outs]

~\AppData\Roaming\Python\Python37\site-packages\tensorflow_core\python\keras\engine\training.py in train_on_batch(self, x, y, sample_weight, class_weight, reset_metrics)
   1015       self._update_sample_weight_modes(sample_weights=sample_weights)
   1016       self._make_train_function()
-> 1017       outputs = self.train_function(ins)  # pylint: disable=not-callable
   1018 
   1019     if reset_metrics:

~\AppData\Roaming\Python\Python37\site-packages\tensorflow_core\python\keras\backend.py in __call__(self, inputs)
   3474 
   3475     fetched = self._callable_fn(*array_vals,
-> 3476                                 run_metadata=self.run_metadata)
   3477     self._call_fetch_callbacks(fetched[-len(self._fetches):])
   3478     output_structure = nest.pack_sequence_as(

~\AppData\Roaming\Python\Python37\site-packages\tensorflow_core\python\client\session.py in __call__(self, *args, **kwargs)
   1470         ret = tf_session.TF_SessionRunCallable(self._session._session,
   1471                                                self._handle, args,
-> 1472                                                run_metadata_ptr)
   1473         if run_metadata:
   1474           proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument: Incompatible shapes: [32] vs. [32,528]
     [[{{node metrics/acc/Equal}}]]
     [[loss_3/dense_4_loss/weighted_loss/broadcast_weights/assert_broadcastable/is_valid_shape/has_valid_nonscalar_shape/has_invalid_dims/concat/_7061]]
  (1) Invalid argument: Incompatible shapes: [32] vs. [32,528]
     [[{{node metrics/acc/Equal}}]]
0 successful operations.
0 derived errors ignored.
0

There are 0 best solutions below