I am trying to train the model below with Indian Pines dataset but I get the following error
Model:
def ResNet50(input_shape, classes=16):
# Define the input as a tensor with shape input_shape
#X_input = Input(shape=(97,145,200))
X_input = Input(shape=input_shape)
# Zero-Padding
X = ZeroPadding2D((3, 3), data_format='channels_last')(X_input) # 3 x 3 padding
# Stage 1 (≈4 lines)
X = Conv2D(64, (7, 7), strides=(2, 2))(X)
print('==================', X.shape)
X = BatchNormalization(axis=3)(X)
print('BN', X.shape)
X = Activation('relu')(X)
print('relu', X.shape)
X = MaxPooling2D((3, 3), strides=(1, 1))(X)
print('max', X.shape)
# Stage 2 (≈3 lines)
X = convolutional_block(X, f=3, filters=[64, 64, 256], s=1)
X = identity_block(X, 3, [64, 64, 256])
X = identity_block(X, 3, [64, 64, 256])
# Stage 3 (≈4 lines)
X = convolutional_block(X, f=3, filters=[128, 128, 512], s=2)
X = identity_block(X, 3, [128, 128, 512])
X = identity_block(X, 3, [128, 128, 512])
X = identity_block(X, 3, [128, 128, 512])
# Stage 4 (≈6 lines)
X = convolutional_block(X, f=3, filters=[256, 256, 1024], s=2)
X = identity_block(X, 3, [256, 256, 1024])
X = identity_block(X, 3, [256, 256, 1024])
X = identity_block(X, 3, [256, 256, 1024])
X = identity_block(X, 3, [256, 256, 1024])
X = identity_block(X, 3, [256, 256, 1024])
# Stage 5 (≈3 lines)
X = convolutional_block(X, f=3, filters=[512, 512, 2048], s=2)
X = identity_block(X, 3, [512, 512, 2048])
X = identity_block(X, 3, [512, 512, 2048])
# AVG-POOL (≈1 line). Use "X = AveragePooling2D(...)(X)"
X = AveragePooling2D(pool_size=(2, 2), padding='same')(X)
# output layer
X = Flatten()(X)
X = Dense(17, activation='softmax', kernel_initializer=glorot_uniform(seed=0))(X)
print("=================",X.shape)
# Create model
model = Model(inputs=X_input, outputs=X, name='ResNet50')
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.summary()
return model
Identity Block
def identity_block(X, f, filters):
# Retrieve filters
F1, F2, F3 = filters
# Copy of the input
X_shortcut = X
# First layer
X = Conv2D(F1, kernel_size=(1, 1), strides=(1, 1), padding='valid')(X)
X = BatchNormalization(axis=3)(X)
X = Activation('relu')(X)
# Second layer
X = Conv2D(F2, kernel_size=(f, f), strides=(1, 1), padding='same')(X)
X = BatchNormalization(axis=3)(X)
X = Activation('relu')(X)
# Third Layer
X = Conv2D(F3, kernel_size=(1, 1), strides=(1, 1), padding='valid')(X)
X = BatchNormalization(axis=3)(X)
# Last step: Adding shortcut value to F(x), and pass it through a RELU activation
X = Add()([X, X_shortcut])
X = Activation('relu')(X)
return X
Convolutional Block
def convolutional_block(X, f, filters, s):
# Retrieve filters
F1, F2, F3 = filters
# Copy of the input
X_shortcut = X
# First layer
X = Conv2D(F1, kernel_size=(1, 1), strides=(s, s), padding='valid')(X)
X = BatchNormalization(axis=3)(X)
X = Activation('relu')(X)
# Second layer
X = Conv2D(F2, kernel_size=(f, f), strides=(1, 1), padding='same')(X)
X = BatchNormalization(axis=3)(X)
X = Activation('relu')(X)
# Third Layer
X = Conv2D(F3, kernel_size=(1, 1), strides=(1, 1), padding='valid')(X)
X = BatchNormalization(axis=3)(X)
# Shortcut path
X_shortcut = Conv2D(F3, kernel_size=(1, 1), strides=(s, s), padding='valid')(X_shortcut)
X_shortcut = BatchNormalization(axis=3)(X_shortcut)
# Last step: Adding shortcut value to F(x), and pass it through a RELU activation
X = Add()([X, X_shortcut])
X = Activation('relu')(X)
return X
Main:
def load_hsi():
X = loadmat('dataset/Indian_pines_corrected.mat')['indian_pines_corrected']
y = loadmat('dataset/Indian_pines_gt.mat')['indian_pines_gt']
print(f"X shape: {X.shape}\ny shape: {y.shape}")
print("===========================================")
return X, y
X, y = load_hsi()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
y_train = to_categorical(y_train)
y_train = y_train.transpose(1, 0, 2).reshape(-1, y_train.shape[2])
y_test = to_categorical(y_test)
y_test = y_test.transpose(1, 0, 2).reshape(-1, y_test.shape[2])
#ADD one dimension to X_train so it becomes 4D
X_train = np.expand_dims(X_train, axis=0)
X_test = np.expand_dims(X_test, axis=0)
y_train = np.expand_dims(y_train, axis=0)
y_test = np.expand_dims(y_test, axis=0)
history = model.fit(X_train, y_train, epochs=100)
I get the following error: File "C:\Users-\PycharmProjects\pythonProject\venv\lib\site-packages\keras\backend.py", line 5119, in categorical_crossentropy target.shape.assert_is_compatible_with(output.shape)
ValueError: Shapes (None, 14065, 17) and (None, 17) are incompatible
I think my data is the wrong size, if someone could tell me what I'm doing wrong
#Model summary
Model: "ResNet50"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 97, 145, 20 0 []
0)]
zero_padding2d (ZeroPadding2D) (None, 103, 151, 20 0 ['input_1[0][0]']
0)
conv2d (Conv2D) (None, 49, 73, 64) 627264 ['zero_padding2d[0][0]']
batch_normalization (BatchNorm (None, 49, 73, 64) 256 ['conv2d[0][0]']
alization)
activation (Activation) (None, 49, 73, 64) 0 ['batch_normalization[0][0]']
max_pooling2d (MaxPooling2D) (None, 47, 71, 64) 0 ['activation[0][0]']
conv2d_1 (Conv2D) (None, 47, 71, 64) 4160 ['max_pooling2d[0][0]']
batch_normalization_1 (BatchNo (None, 47, 71, 64) 256 ['conv2d_1[0][0]']
rmalization)
.....
activation_45 (Activation) (None, 6, 9, 2048) 0 ['add_14[0][0]']
conv2d_50 (Conv2D) (None, 6, 9, 512) 1049088 ['activation_45[0][0]']
batch_normalization_50 (BatchN (None, 6, 9, 512) 2048 ['conv2d_50[0][0]']
ormalization)
activation_46 (Activation) (None, 6, 9, 512) 0 ['batch_normalization_50[0][0]']
conv2d_51 (Conv2D) (None, 6, 9, 512) 2359808 ['activation_46[0][0]']
batch_normalization_51 (BatchN (None, 6, 9, 512) 2048 ['conv2d_51[0][0]']
ormalization)
activation_47 (Activation) (None, 6, 9, 512) 0 ['batch_normalization_51[0][0]']
conv2d_52 (Conv2D) (None, 6, 9, 2048) 1050624 ['activation_47[0][0]']
batch_normalization_52 (BatchN (None, 6, 9, 2048) 8192 ['conv2d_52[0][0]']
ormalization)
add_15 (Add) (None, 6, 9, 2048) 0 ['batch_normalization_52[0][0]',
'activation_45[0][0]']
activation_48 (Activation) (None, 6, 9, 2048) 0 ['add_15[0][0]']
average_pooling2d (AveragePool (None, 3, 5, 2048) 0 ['activation_48[0][0]']
ing2D)
flatten (Flatten) (None, 30720) 0 ['average_pooling2d[0][0]']
dense (Dense) (None, 17) 522257 ['flatten[0][0]']
==================================================================================================
Total params: 24,727,761
Trainable params: 24,674,641
Non-trainable params: 53,120
__________________________________________________________________________________________________
So the main issue is that the shape of your labels is
(1, 14065, 17)
, whereas your model outputs(None, 17)
. Also if you have 16 classes you need 16 units on the last dense layer not 17.For the labels, there is no need to add a dimension for the channel. They are labels, meaning either indexes, or one-hot-encodings, therefore the correct shape is
(14065, 17)
and not(1, 14065, 17)
.Another issue is how your images are shaped. For your images, the dimension needs to be
(n_images, dim1, dim2, n_channels)
, wheren_images
is the number of images,dim1
anddim2
are the dimensions of each image andn_channels
is the number of channels of each image.EDIT:
"Yes that is the size of my X (145,145,200) that is what X.shape shows me after i load the data"
Then you simply do
np.expand_dims(X, axis=0)
orX.reshape((1, 145, 145, 200))
to have(1, 145, 145, 200)