i'm working on a speech recognition usign lstm model, i'm using the mini speech recognition command dataset, the tutorial given on tensorflow simple audio, is for classification and they used CNN, so i tried to change it to a speech recognition using LSTM,

i tried to change the shape of my data or to change my logic, but every time i tried so i get a different error, down is my code

import os
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import models
from IPython import display

seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)
DATASET_PATH = 'data/mini_speech_commands'

data_dir = pathlib.Path(DATASET_PATH)
if not data_dir.exists():
  tf.keras.utils.get_file(
      'mini_speech_commands.zip',
      origin="http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip",
      extract=True,
      cache_dir='.', cache_subdir='data')
commands = np.array(tf.io.gfile.listdir(str(data_dir)))
commands = commands[(commands != 'README.md') & (commands != '.DS_Store')]
print('Commands:', commands)
def get_mfcc(audio):
    stft = tf.signal.stft(audio, frame_length=400, frame_step=160, fft_length=512, pad_end=True)
    spectrogram = tf.abs(stft) ** 2
    num_mel_bins = 13 
    mel_filterbanks = tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins, 257, 16000, lower_edge_hertz=0.0, upper_edge_hertz=8000.0)
    mel_spectrogram = tf.matmul(spectrogram, mel_filterbanks)
    log_mel_spectrogram = tf.math.log(mel_spectrogram + 1e-6)  # Small constant added to avoid log(0)
    log_mel_spectrogram = tf.image.resize_with_crop_or_pad(log_mel_spectrogram, max_sequence_length, num_mel_bins)
    return log_mel_spectrogram
batch_size = 64
max_sequence_length = 100 
train_ds, val_ds = tf.keras.utils.audio_dataset_from_directory(
    directory=data_dir,
    batch_size=batch_size,
    validation_split=0.2,
    seed=0,
    output_sequence_length=max_sequence_length,  
    subset='both')
# Map the get_mfcc function for dataset preprocessing
def preprocess(audio, label):
    mfccs = get_mfcc(audio)
    return mfccs, label

train_ds = train_ds.map(preprocess, tf.data.AUTOTUNE)
val_ds = val_ds.map(preprocess, tf.data.AUTOTUNE)
num_labels = len(commands)
model = models.Sequential([
    layers.Input(shape=(max_sequence_length, num_mel_bins)),  
    layers.Masking(mask_value=0.0),  # Mask padding
    layers.LSTM(128, return_sequences=True),
    layers.LSTM(128),
    layers.Dense(num_labels, activation='softmax')
])
model.summary()
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)
# Training
EPOCHS = 10
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    callbacks=tf.keras.callbacks.EarlyStopping(verbose=1, patience=2)
)

when running the code i get

ValueError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1338, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1322, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1303, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1080, in train_step
        y_pred = self(x, training=True)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/input_spec.py", line 235, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer 'sequential_6' (type Sequential).
    
    Input 0 of layer "lstm_12" is incompatible with the layer: expected ndim=3, found ndim=4. Full shape received: (None, 100, 13, 13)
    
    Call arguments received by layer 'sequential_6' (type Sequential):
      • inputs=tf.Tensor(shape=(None, 100, 13, 13), dtype=float32)
      • training=True
      • mask=None

i don't know why i get this error, is there someone who can help me.

0

There are 0 best solutions below