I was training a tensorflow model and using ignore_class=0 to ignore the class 0 when computing the loss.
unet.compile(
loss=keras.losses.SparseCategoricalCrossentropy(
from_logits=True,
ignore_class=0,
),
optimizer=keras.optimizers.Adam(learning_rate=0.001),
metrics=["accuracy"],
)
This stopped working after I updated all my packages including python version, tf and keras. Running the model now raises the following error:
model.fit(
File "/user/anaconda3/envs/environment/lib/python3.12/site-packages/keras/src/utils/traceback_utils.py", line 122, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/user/anaconda3/envs/environment/lib/python3.12/site-packages/keras/src/backend/tensorflow/nn.py", line 623, in sparse_categorical_crossentropy
raise ValueError(
ValueError: Arguments `target` and `output` must have the same shape up until the last dimension: target.shape=(None, 224, 224, 1), output.shape=(None, 224, 224, 224, 6)
The training is successful when I comment out ignore_class=0. Any clue what's causing the extra 224 in output.shape?
unet = keras.Model(inputs=inputs, outputs=out)
This is what I get when I log the shapes of my inputs and outputs.
inputs <KerasTensor shape=(None, 224, 224, 3), dtype=float32, sparse=None, name=keras_tensor_429> (None, 224, 224, 3)
out <KerasTensor shape=(None, 224, 224, 6), dtype=float32, sparse=False, name=keras_tensor_459> (None, 224, 224, 6)
Here's my model architecture
def model(num_classes):
base = keras.applications.DenseNet121(
input_shape=(224, 224, 3), include_top=False, weights="imagenet"
)
base.summary()
skip_names = [
"conv1_relu", # size 64*64
"pool2_relu", # size 32*32
"pool3_relu", # size 16*16
"pool4_relu", # size 8*8
"relu", # size 4*4
]
# output of these layers
skip_outputs = [base.get_layer(name).output for name in skip_names]
# Building the downstack with the above layers. We use the pre-trained model as such, without any fine-tuning.
downstack = keras.Model(inputs=base.input, outputs=skip_outputs)
# freeze the downstack layers
downstack.trainable = False
upstack = [
# pix2pix.upsample(1024, 3),
pix2pix.upsample(512, 3),
pix2pix.upsample(256, 3),
pix2pix.upsample(128, 3),
pix2pix.upsample(64, 3),
]
# define the input layer
inputs = keras.layers.Input(shape=[224, 224, 3])
# downsample
down = downstack(inputs)
out = down[-1]
# prepare skip-connections
skips = reversed(down[:-1])
# choose the last layer at first 4 --> 8
# upsample with skip-connections
for up, skip in zip(upstack, skips):
out = up(out)
out = keras.layers.Concatenate()([out, skip])
# define the final transpose conv layer
out = keras.layers.Conv2DTranspose(
num_classes,
3,
strides=2,
padding="same",
)(out)
# complete unet model
unet = keras.Model(inputs=inputs, outputs=out)
unet.compile(
loss=keras.losses.SparseCategoricalCrossentropy(
from_logits=True,
# ignore_class=0,
),
optimizer=keras.optimizers.Adam(learning_rate=0.001),
metrics=["accuracy"],
)
unet.summary()
return unet