BiLSTM hidden layers, and memory cells

238 Views Asked by At

I have a BiLSTM model, as the following:

tf.keras.models.Sequential([
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(A, return_sequences=True),
                                  input_shape=x),
    tf.keras.layers.Dense(B, activation='tanh'),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(A)),
    tf.keras.layers.Dense(B, activation='tanh'),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Dense(output),
])

If the total parameters = 1 million, what values should A and B be? How many hidden layers should I add to let the model train in a proper way?

I tried the following:

A = 265

B = 64

I used three dense layers, but the forecasting is still weak!

1

There are 1 best solutions below

9
On

The LSTM layer is long-short-term memory. It can process input as sequences. You do not need to chop the input into small pieces.

Sample: A single shape and double sharp. You can apply BiDirection or a domain property as well. I use this example as a single trip because of its dimension.

import tensorflow as tf

class MyLSTMLayer( tf.keras.layers.LSTM ):
def __init__(self, units, return_sequences, return_state):
    super(MyLSTMLayer, self).__init__( units, return_sequences=True, return_state=False )
    self.num_units = units

def build(self, input_shape):
    self.kernel = self.add_weight("kernel",
    shape=[int(input_shape[-1]),
    self.num_units])

def call(self, inputs):
    lstm = tf.keras.layers.LSTM(self.num_units)
    return lstm(inputs)


start = 3
limit = 93
delta = 3
sample = tf.range(start, limit, delta)
sample = tf.cast( sample, dtype=tf.float32 )
sample = tf.constant( sample, shape=( 30, 1, 1 ) )
layer = MyLSTMLayer(10, True, True)
layer_2 = MyLSTMLayer(20, True, False)

temp = layer(sample)
print( temp )
temp = tf.expand_dims(temp, -1)
temp = layer_2(temp)
print( temp )

Operation: ( 10, 1, 1 ) x ( 10, 1, 1 )

layer = MyLSTMLayer(10, True, True)
sample = tf.constant( sample, shape=( 10, 1, 1 ) )

Output: (10, 10)

...
  1, 1, 1, 1]], shape=(10, 10), dtype=float32)

Operation: ( 20, 1, 1 ) x ( 10, 1, 1 )

layer = MyLSTMLayer(20, True, True)
sample = tf.constant( sample, shape=( 10, 1, 1 ) )

Output: (20, 10)

...
 1, 1, 1, 1, 1, 1]], shape=(20, 10), dtype=float32)

Operation: ( 30, 1, 1 ) x ( 10, 1, 1 )

layer = MyLSTMLayer(30, True, True)
sample = tf.constant( sample, shape=( 10, 1, 1 ) )

Output: (30, 10)

...
 1, 1, 1, 1, 1, 1]], shape=(30, 10), dtype=float32)

Operation: ( 30, 1, 1 ) x ( 10, 1, 1 )

layer = MyLSTMLayer(10, True, True)
layer_2 = MyLSTMLayer(20, True, False)
sample = tf.constant( sample, shape=( 30, 1, 1 ) )

Output: (30, 20)

...
 1, 1, 1, 1]]], shape=(30, 20), dtype=float32)

Sample: Implementation, Discrete sequence

import tensorflow as tf

class MyLSTMLayer( tf.keras.layers.LSTM ):
    def __init__(self, units, return_sequences, return_state):
        super(MyLSTMLayer, self).__init__( units, return_sequences=True, return_state=False )
        self.num_units = units

    def build(self, input_shape):
        self.kernel = self.add_weight("kernel",
        shape=[int(input_shape[-1]),
        self.num_units])

    def call(self, inputs):
        lstm = tf.keras.layers.LSTM(self.num_units)
        temp = lstm(inputs)
        temp = tf.nn.softmax(temp)
        temp = tf.math.argmax(temp).numpy()
        return temp

sample = tf.constant( [1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], shape=( 10, 1, 1 ) )
layer = MyLSTMLayer(10, True, False)
temp = layer(sample)
print( temp )

Output: As a sequence

[1 0 1 1 1 0 0 0 1 0]