I am working on speech recognition system and I took the code from GitHub. Made some additions in that code here:
DATASET_PATH = "F://MS//MS-4//LibriSpeech"
*JSON_PATH = "data_10.json"
SAMPLE_RATE = 22050
TRACK_DURATION = 15
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION*
def save_mfcc (dataset_path, json_path, n_mfcc=13, n_fft=2048, hop_length=512, num_segments=5):
data = {
"mapping": [ ],
"mfcc": [ ],
"labels": [ ]}
num_samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
expected_num_mfcc_vectors_per_segment = math.ceil(num_samples_per_segment / hop_length)
for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
if dirpath is not dataset_path:
dirpath_components = os.path.split(dirpath)
semantic_label = dirpath_components[-1]
data["mapping"].append(semantic_label)
print("\nProcessing: {}".format(semantic_label))
for f in filenames:
file_path = os.path.join(dirpath, f)
signal, sr = librosa.load(file_path, sr = SAMPLE_RATE)
for s in range(num_segments):
start_sample = num_samples_per_segment * s
finish_sample = start_sample + num_samples_per_segment
mfcc=librosa.feature.mfcc(signal[start_sample:finish_sample],sr=sr,n_fft=n_fft,n_mfcc=n_mfcc,hop_length=hop_length)
mfcc = mfcc.T
if len(mfcc) == expected_num_mfcc_vectors_per_segment:
data["mfcc"].append(mfcc.tolist())
data["labels"].append(i-1)
print("{}, segment:{}".format(file_path, s+1 ))
with open(json_path, "w") as fp:
json.dump(data, fp, indent=4)
if __name__ == "__main__":
save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10)
This is the error. I want to know how it can be fixed:
Warning (from warnings module):
File "C:\Users\Hp\AppData\Local\Programs\Python\Python39\lib\site-packages\librosa\core\spectrum.py", line 222
warnings.warn(
UserWarning: n_fft=2048 is too small for input signal of length=0Traceback (most recent call last):
File "C:\Users\Hp\AppData\Local\Programs\Python\Python39\datasetread.py", line 73, in
save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10)File "C:\Users\Hp\AppData\Local\Programs\Python\Python39\datasetread.py", line 55, in save_mfcc
mfcc=librosa.feature.mfcc(signal[start_sample:finish_sample],sr=sr,n_fft=n_fft,n_mfcc=n_mfcc,hop_length=hop_length)File "C:\Users\Hp\AppData\Local\Programs\Python\Python39\lib\site-packages\librosa\feature\spectral.py", line 1852, in mfcc
S = power_to_db(melspectrogram(y=y, sr=sr, **kwargs))File "C:\Users\Hp\AppData\Local\Programs\Python\Python39\lib\site-packages\librosa\feature\spectral.py", line 1996, in melspectrogram
S, n_fft = _spectrogram(File "C:\Users\Hp\AppData\Local\Programs\Python\Python39\lib\site-packages\librosa\core\spectrum.py", line 2512, in _spectrogram
stft(File "C:\Users\Hp\AppData\Local\Programs\Python\Python39\lib\site-packages\librosa\core\spectrum.py", line 228, in stft
y = np.pad(y, int(n_fft // 2), mode=pad_mode)File "<array_function internals>", line 5, in pad
File "C:\Users\Hp\AppData\Local\Programs\Python\Python39\lib\site-packages\numpy\lib\arraypad.py", line 814, in pad
raise ValueError(ValueError: can't extend empty axis 0 using modes other than 'constant' or 'empty'
Please identify all mistakes that causes this error or may cause some more error. Thanks in advance.
In your code you have taken TRACK_DURATION=15 (seconds) but I think in your dataset there may be some tracks(files) with duration less than 15 seconds. So try reducing TRACK_DURATION