I am trying to run a DeepLabV3+ model for image segmentation. The shape of the image is (224,224,8) as I have 8 bands (satellite image) and the label is (224,224,1). The below is a view of the shapes of my train, validation and test sets.
print(train_dataset)
print(val_dataset)
print(test_dataset)
<BatchDataset element_spec=(TensorSpec(shape=(5, 224, 224, 8), dtype=tf.float32, name=None), TensorSpec(shape=(5, 224, 224, 1), dtype=tf.float32, name=None))>
<BatchDataset element_spec=(TensorSpec(shape=(5, 224, 224, 8), dtype=tf.float32, name=None), TensorSpec(shape=(5, 224, 224, 1), dtype=tf.float32, name=None))>
<BatchDataset element_spec=(TensorSpec(shape=(5, 224, 224, 8), dtype=tf.float32, name=None), TensorSpec(shape=(5, 224, 224, 1), dtype=tf.float32, name=None))>
ResNet50 as input shape expects (224,224,3) so I need to modify my initial layers for it to work with ResNet50. I added a conv2D layer and max pooling initially as input to ResNet50. I've attached a screenshot of that as well. I've been trying to find a way to run this and also benefit from the ImageNet weights.
However this raises the following error:
ValueError Traceback (most recent call last)
Cell In[198], line 18
15 x = MaxPooling2D((2, 2), strides=(2, 2), padding='same')(x)
17 # Load the pre-trained ResNet-50 model (without top classification layers)
---> 18 resnet = ResNet50(weights='imagenet', include_top=False, input_tensor=x)
20 # Freeze the layers of the pre-trained model
21 for layer in resnet.layers:
File /opt/conda/lib/python3.10/site-packages/keras/applications/resnet.py:521, in ResNet50(include_top, weights, input_tensor, input_shape, pooling, classes, **kwargs)
518 x = stack1(x, 256, 6, name="conv4")
519 return stack1(x, 512, 3, name="conv5")
--> 521 return ResNet(
522 stack_fn,
523 False,
524 True,
525 "resnet50",
526 include_top,
527 weights,
528 input_tensor,
529 input_shape,
530 pooling,
531 classes,
532 **kwargs,
533 )
File /opt/conda/lib/python3.10/site-packages/keras/applications/resnet.py:238, in ResNet(stack_fn, preact, use_bias, model_name, include_top, weights, input_tensor, input_shape, pooling, classes, classifier_activation, **kwargs)
231 file_hash = WEIGHTS_HASHES[model_name][1]
232 weights_path = data_utils.get_file(
233 file_name,
234 BASE_WEIGHTS_PATH + file_name,
235 cache_subdir="models",
236 file_hash=file_hash,
237 )
--> 238 model.load_weights(weights_path)
239 elif weights is not None:
240 model.load_weights(weights)
File /opt/conda/lib/python3.10/site-packages/keras/utils/traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File /opt/conda/lib/python3.10/site-packages/keras/saving/legacy/hdf5_format.py:812, in load_weights_from_hdf5_group(f, model)
810 layer_names = filtered_layer_names
811 if len(layer_names) != len(filtered_layers):
--> 812 raise ValueError(
813 "Layer count mismatch when loading weights from file. "
814 f"Model expected {len(filtered_layers)} layers, found "
815 f"{len(layer_names)} saved layers."
816 )
818 # We batch weight value assignments in a single backend call
819 # which provides a speedup in TensorFlow.
820 weight_value_tuples = []
ValueError: Layer count mismatch when loading weights from file. Model expected 107 layers, found 106 saved layers.
If I take the pre-trained weights out i.e. set weights = False then I get the following error which doesn't make sense because the model summary shows the correct size in the layers.
Error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[200], line 8
1 loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
2 model.compile(
3 optimizer=keras.optimizers.Adam(learning_rate=0.001),
4 loss=loss,
5 metrics=["accuracy"],
6 )
----> 8 history = model.fit(train_dataset, validation_data=val_dataset, epochs=25)
10 plt.plot(history.history["loss"])
11 plt.title("Training Loss")
File /opt/conda/lib/python3.10/site-packages/keras/utils/traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File /var/tmp/__autograph_generated_files83s1t5c.py:15, in outer_factory.<locals>.inner_factory.<locals>.tf__train_function(iterator)
13 try:
14 do_return = True
---> 15 retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
16 except:
17 do_return = False
ValueError: in user code:
File "/opt/conda/lib/python3.10/site-packages/keras/engine/training.py", line 1249, in train_function *
return step_function(self, iterator)
File "/opt/conda/lib/python3.10/site-packages/keras/engine/training.py", line 1233, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/opt/conda/lib/python3.10/site-packages/keras/engine/training.py", line 1222, in run_step **
outputs = model.train_step(data)
File "/opt/conda/lib/python3.10/site-packages/keras/engine/training.py", line 1023, in train_step
y_pred = self(x, training=True)
File "/opt/conda/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/opt/conda/lib/python3.10/site-packages/keras/engine/input_spec.py", line 295, in assert_input_compatibility
raise ValueError(
ValueError: Input 0 of layer "model_17" is incompatible with the layer: expected shape=(None, 224, 224, 3), found shape=(None, None, None, 5, 224, 224, 8)
ResNet50
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Input, Conv3D, MaxPooling3D, GlobalAveragePooling3D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
# Define input shape for 3D data
input_shape = (224, 224, 8) # (height, width, depth, channels)
# Create an input layer for the 3D input shape
input_layer = Input(shape=input_shape)
# Create a custom convolutional part that matches the initial layers of ResNet-50 using 3D convolutions
x = Conv2D(64, (3, 3), strides=(2, 2), padding='same', activation='relu',kernel_initializer='glorot_uniform')(input_layer)
x = MaxPooling2D((2, 2), strides=(2, 2), padding='same')(x)
# Load the pre-trained ResNet-50 model (without top classification layers)
resnet = ResNet50(weights='imagenet', include_top=False, input_tensor=x)
# Freeze the layers of the pre-trained model
for layer in resnet.layers:
layer.trainable = False
# Create a new output layer for your classification task
x = GlobalAveragePooling3D()(resnet.output)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(num_classes, activation='softmax')(x)
# Build the model using the modified architecture
model = Model(inputs=input_layer, outputs=x)
# Compile the model with categorical cross-entropy loss
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
# Check the model summary
model.summary()
DeepLabv3 Modified (https://keras.io/examples/vision/deeplabv3_plus/)
I also tried a combined model with the aim to chain models directly in DeepLabV3+. However it also generated the same error.
Code
def DeeplabV3Plus(image_size, num_classes):
input_layer = Input(shape=(224, 224, 8))
# Create the custom neural network model for the initial transformation
custom_model = keras.Sequential([
keras.Input(shape=(224, 224, 8)),
layers.Conv2D(64, (3, 3), activation='relu', padding='same',kernel_initializer='glorot_uniform'),
layers.Conv2D(64, (3, 3), activation='relu', padding='same',kernel_initializer='glorot_uniform'),
layers.Conv2D(3, (3, 3), activation='sigmoid', padding='same',kernel_initializer='glorot_uniform')
])
model_input = custom_model(keras.Input(shape=(224, 224, 8))) #keras.Input(shape=(image_size, image_size, 3))
resnet50 = keras.applications.ResNet50(
weights="imagenet", include_top=False, input_tensor=model_input
)
x = resnet50.get_layer("conv4_block6_2_relu").output
x = DilatedSpatialPyramidPooling(x)
input_a = layers.UpSampling2D(
size=(image_size // 4 // x.shape[1], image_size // 4 // x.shape[2]),
interpolation="bilinear",
)(x)
input_b = resnet50.get_layer("conv2_block3_2_relu").output
input_b = convolution_block(input_b, num_filters=48, kernel_size=1)
x = layers.Concatenate(axis=-1)([input_a, input_b])
x = convolution_block(x)
x = convolution_block(x)
x = layers.UpSampling2D(
size=(image_size // x.shape[1], image_size // x.shape[2]),
interpolation="bilinear",
)(x)
model_output = layers.Conv2D(num_classes, kernel_size=(1, 1), padding="same")(x)
return keras.Model(inputs=model_input, outputs=model_output)
model = DeeplabV3Plus(image_size=IMAGE_SIZE, num_classes=NUM_CLASSES)
model.summary()
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss=loss,
metrics=["accuracy"],
)
history = model.fit(train_dataset, validation_data=val_dataset, epochs=25)
Error
ValueError: Layer count mismatch when loading weights from file. Model expected 107 layers, found 106 saved layers.
The expectation is that I am able to run the deeplabv3+ model for my given image size with ResNet50 as backbone.I think I am doing a very stupid mistake here but I can't figure out what! Any help appreciated.