output of model.summary() is not as expected tensorflow 2

1.1k Views Asked by At

I've defined a complex deep learning model, but for the purpose of this question, I'll use a simple one.

Consider the following:

import tensorflow as tf
from tensorflow.keras import layers, models


def simpleMLP(in_size, hidden_sizes, num_classes, dropout_prob=0.5):
    in_x = layers.Input(shape=(in_size,))
    hidden_x = models.Sequential(name="hidden_layers")
    for i, num_h in enumerate(hidden_sizes):
        hidden_x.add(layers.Dense(num_h, input_shape=(in_size,) if i == 0 else []))
        hidden_x.add(layers.Activation('relu'))
        hidden_x.add(layers.Dropout(dropout_prob))
    out_x = layers.Dense(num_classes, activation='softmax', name='baseline')
    return models.Model(inputs=in_x, outputs=out_x(hidden_x(in_x)))

I will call the function in the following manner:

mdl = simpleMLP(28*28, [500, 300], 10)

Now when I do mdl.summary() I get the following:

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
hidden_layers (Sequential)   (None, 300)               542800    
_________________________________________________________________
baseline (Dense)             (None, 10)                3010      
=================================================================
Total params: 545,810
Trainable params: 545,810
Non-trainable params: 0
_________________________________________________________________

The problem is that the Sequential block is condensed and showing only the last layer but the sum total of parameters.

In my complex model, I have multiple Sequential blocks that are all hidden.

Is there a way to make it be more verbose? Am I doing something wrong in the model definition?

Edit

When using pytorch I don't see the same behaviour, given the following example (taken from here):

import torch
import torch.nn as nn


class MyCNNClassifier(nn.Module):
    def __init__(self, in_c, n_classes):
        super().__init__()
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(in_c, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )

        self.conv_block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )

        self.decoder = nn.Sequential(
            nn.Linear(32 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, n_classes)
        )

    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)

        x = x.view(x.size(0), -1)  # flat

        x = self.decoder(x)

        return x

When printing it I get:

MyCNNClassifier(
  (conv_block1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv_block2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=25088, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)
1

There are 1 best solutions below

0
AudioBubble On

There is nothing wrong in model summary in Tensorflow 2.x.

import tensorflow as tf
from tensorflow.keras import layers, models

def simpleMLP(in_size, hidden_sizes, num_classes, dropout_prob=0.5):
    in_x = layers.Input(shape=(in_size,))
    hidden_x = models.Sequential(name="hidden_layers")
    for i, num_h in enumerate(hidden_sizes):
        hidden_x.add(layers.Dense(num_h, input_shape=(in_size,) if i == 0 else []))
        hidden_x.add(layers.Activation('relu'))
        hidden_x.add(layers.Dropout(dropout_prob))
    out_x = layers.Dense(num_classes, activation='softmax', name='baseline')
    return models.Model(inputs=in_x, outputs=out_x(hidden_x(in_x)))

mdl = simpleMLP(28*28, [500, 300], 10)
mdl.summary()

Output:

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
hidden_layers (Sequential)   (None, 300)               542800    
_________________________________________________________________
baseline (Dense)             (None, 10)                3010      
=================================================================
Total params: 545,810
Trainable params: 545,810
Non-trainable params: 0
_________________________________________________________________

You can use get_layer to retrieve a layer on either its name or index.

If name and index are both provided, index will take precedence. Indices are based on order of horizontal graph traversal (bottom-up).

Here to get Sequential layer (i.e. indexed at 1 in mdl) details, you can try

mdl.get_layer(index=1).summary()

Output:

Model: "hidden_layers"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_2 (Dense)              (None, 500)               392500    
_________________________________________________________________
activation_2 (Activation)    (None, 500)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 500)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 300)               150300    
_________________________________________________________________
activation_3 (Activation)    (None, 300)               0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 300)               0         
=================================================================
Total params: 542,800
Trainable params: 542,800
Non-trainable params: 0
_________________________________________________________________