GRAD-CAM: How to I extract the output of the intermediate layers for this shared weight model?

162 Views Asked by At

This is the siamese network model I built using keras siamese network implementation.

base_cnn = resnet.ResNet50(
    weights="imagenet", input_shape=target_shape + (3,), include_top=False
)
flatten = layers.Flatten()(base_cnn.output)
#dense0 = layers.Dense(1024, activation="relu")(flatten)
#dense0 = layers.BatchNormalization()(dense0)
dense1 = layers.Dense(512, activation="relu")(flatten)
dense1 = layers.BatchNormalization()(dense1)
dense2 = layers.Dense(512, activation="relu")(dense1)
dense2 = layers.BatchNormalization()(dense2)
dense3 = layers.Dense(512, activation="relu")(dense2)
dense3 = layers.BatchNormalization()(dense3)
output = layers.Dense(256)(dense3)


embedding = Model(base_cnn.input, output, name="Embedding")

trainable = False
for layer in base_cnn.layers:
    layer.trainable = trainable

class DistanceLayer(layers.Layer):
    """
    This layer is responsible for computing the distance between the anchor
    embedding and the positive embedding, and the anchor embedding and the
    negative embedding.
    """

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def call(self, anchor, positive, negative):
        #ap_distance = tf.reduce_sum(tf.square(anchor - positive), axis = -1)
        #an_distance = tf.reduce_sum(tf.square(anchor - negative), axis = -1)
        ap_distance = tf.keras.losses.cosine_similarity(anchor, positive)
        an_distance = tf.keras.losses.cosine_similarity(anchor, negative)
        
        return (ap_distance, an_distance)
    
anchor_input = layers.Input(name="anchor", shape=target_shape + (3,))
positive_input = layers.Input(name="positive", shape=target_shape + (3,))
negative_input = layers.Input(name="negative", shape=target_shape + (3,))

distances = DistanceLayer()(
    embedding(resnet.preprocess_input(anchor_input)),
    embedding(resnet.preprocess_input(positive_input)),
    embedding(resnet.preprocess_input(negative_input)),
)


siamese_network = Model(
    inputs=[anchor_input, positive_input, negative_input], outputs=distances
)
class SiameseModel(Model):
    """The Siamese Network model with a custom training and testing loops.

    Computes the triplet loss using the three embeddings produced by the
    Siamese Network.

    The triplet loss is defined as:
       L(A, P, N) = max(‖f(A) - f(P)‖² - ‖f(A) - f(N)‖² + margin, 0)
    """

    def __init__(self, siamese_network, margin= 0.2):
        super().__init__()
        self.siamese_network = siamese_network
        self.margin = margin
        self.loss_tracker = metrics.Mean(name="loss")

    def call(self, inputs):
        return self.siamese_network(inputs)

    def train_step(self, data):
        # GradientTape is a context manager that records every operation that
        # you do inside. We are using it here to compute the loss so we can get
        # the gradients and apply them using the optimizer specified in
        # `compile()`.
        with tf.GradientTape() as tape:
            loss = self._compute_loss(data)

        # Storing the gradients of the loss function with respect to the
        # weights/parameters.
        gradients = tape.gradient(loss, self.siamese_network.trainable_weights)

        # Applying the gradients on the model using the specified optimizer
        self.optimizer.apply_gradients(
            zip(gradients, self.siamese_network.trainable_weights)
        )

        # Let's update and return the training loss metric.
        self.loss_tracker.update_state(loss)
        return {"loss": self.loss_tracker.result()}

    def test_step(self, data):
        loss = self._compute_loss(data)

        # Let's update and return the loss metric.
        self.loss_tracker.update_state(loss)
        return {"loss": self.loss_tracker.result()}

    def _compute_loss(self, data):
        # The output of the network is a tuple containing the distances
        # between the anchor and the positive example, and the anchor and
        # the negative example.
        ap_distance, an_distance = self.__call__(data)

        # Computing the Triplet Loss by subtracting both distances and
        # making sure we don't get a negative value.
        loss = ap_distance - an_distance
        #loss = tf.maximum(loss + self.margin, 0.0)
        loss = tf.keras.backend.clip(loss + self.margin, 0., None)
        return loss
    
    def get_config(self):
        config = {
            "siamese_network": self.siamese_network,
            "margin": self.margin,
        }
        base_config = super().get_config()
        return dict(list(base_config.items()) + list(config.items()))

    @classmethod
    def from_config(cls, config):
        siamese_network = config.pop("siamese_network")
        margin = config.pop("margin")
        model = cls(siamese_network=siamese_network, margin=margin)
        return model

    @property
    def metrics(self):
        # We need to list our metrics here so the `reset_states()` can be
        # called automatically.
        return [self.loss_tracker]

This is the model architecture: Model Architecture

Now I want to apply GRAD CAM algorithm on it to visualize what the model was focusing on. I used the keras implementation here: One of the requirements of the Grad cam is to get the output of the last convolutional layer in the model and get its activations and also the model.output. I used the following code: https://keras.io/examples/vision/grad_cam/

grad_model = tf.keras.models.Model(
    [siamese_network.input], [siamese_network.get_layer("Embedding").get_layer("conv5_block3_out").output, siamese_network.output]
)

Why am I getting the following error?

ValueError: Graph disconnected: cannot obtain value for tensor
KerasTensor(type_spec=TensorSpec(shape=(None, 200, 200, 3), dtype=tf.float32, name='input_1'), name='input_1', description="created by layer 'input_1'") 
at layer "conv1_pad". The following previous layers were accessed without issue: []

Is there a way to make a model from the siamese_network I have built to output the result of all 3 embedding outputs and the original model output that gives the distance ap and an?

0

There are 0 best solutions below