I'm learning tiny, so I tried to convert an machine learning ia (https://github.com/mariostrbac/environmental-sound-classification) and add the tflite part of this course (https://colab.research.google.com/github/tinyMLx/colabs/blob/master/3-3-10-TFLiteOptimizations.ipynb).
But after adapting the code, my tinyml is bad (about 15% accuracy) and I don't know why.
I'm using google colab
Libraries
pip install np_utils
import numpy as np
import pandas as pd
import tensorflow as tf
import pathlib
from tensorflow.keras import regularizers, activations
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from datetime import datetime
from matplotlib import pyplot as plt
%matplotlib inline
Import data (128x128 Log-Mel Spectrograms)
USE_GOOGLE_COLAB = True
if USE_GOOGLE_COLAB:
from google.colab import drive
drive.mount('/content/gdrive')
# change the current working directory
%cd /content/gdrive/MyDrive/
else:
%cd US8K
us8k_df = pd.read_pickle("us8k_df.pkl")
function
#data augmentation
def init_data_aug():
train_datagen = ImageDataGenerator(
featurewise_center=True,
featurewise_std_normalization=True,
fill_mode = 'constant',
cval=-80.0,
width_shift_range=0.1,
height_shift_range=0.0)
val_datagen = ImageDataGenerator(
featurewise_center=True,
featurewise_std_normalization=True,
fill_mode = 'constant',
cval=-80.0)
return train_datagen, val_datagen
#the model
def init_model():
model1 = Sequential()
#layer-1
model1.add(Conv2D(filters=24, kernel_size=5, input_shape=(128, 128, 1),
kernel_regularizer=regularizers.l2(1e-3)))
model1.add(MaxPooling2D(pool_size=(3,3), strides=3))
model1.add(Activation(activations.relu))
#layer-2
model1.add(Conv2D(filters=36, kernel_size=4, padding='valid', kernel_regularizer=regularizers.l2(1e-3)))
model1.add(MaxPooling2D(pool_size=(2,2), strides=2))
model1.add(Activation(activations.relu))
#layer-3
model1.add(Conv2D(filters=48, kernel_size=3, padding='valid'))
model1.add(Activation(activations.relu))
model1.add(GlobalAveragePooling2D())
#layer-4 (1st dense layer)
model1.add(Dense(60, activation='relu'))
model1.add(Dropout(0.5))
#layer-5 (2nd dense layer)
model1.add(Dense(10, activation='softmax'))
# compile
model1.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
return model1
#init model
model = init_model()
model.summary()
#split train/validation data
def train_test_split(fold_k, data, X_dim=(128, 128, 1)):
X_train = np.stack(data[data.fold != fold_k].melspectrogram.to_numpy())
X_test = np.stack(data[data.fold == fold_k].melspectrogram.to_numpy())
y_train = data[data.fold != fold_k].label.to_numpy()
y_test = data[data.fold == fold_k].label.to_numpy()
XX_train = X_train.reshape(X_train.shape[0], *X_dim)
XX_test = X_test.reshape(X_test.shape[0], *X_dim)
yy_train = to_categorical(y_train)
yy_test = to_categorical(y_test)
return XX_train, XX_test, yy_train, yy_test
#Train data
def process_fold(fold_k, data, epochs=100, num_batch_size=32):
# split the data
X_train, X_test, y_train, y_test = train_test_split(fold_k, data)
# init data augmention
train_datagen, val_datagen = init_data_aug()
# fit augmentation
train_datagen.fit(X_train)
val_datagen.fit(X_train)
# init model
model = init_model()
# pre-training accuracy
score = model.evaluate(val_datagen.flow(X_test, y_test, batch_size=num_batch_size), verbose=0)
print("Pre-training accuracy: %.4f%%\n" % (100 * score[1]))
# train the model
start = datetime.now()
history = model.fit(train_datagen.flow(X_train, y_train, batch_size=num_batch_size),
steps_per_epoch=len(X_train) / num_batch_size,
epochs=epochs,
validation_data=val_datagen.flow(X_test, y_test, batch_size=num_batch_size))
end = datetime.now()
print("Training completed in time: ", end - start, '\n')
return history
def show_results(tot_history):
"""Show accuracy and loss graphs for train and test sets."""
for i, history in enumerate(tot_history):
print('\n({})'.format(i+1))
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.grid(linestyle='--')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.subplot(122)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.grid(linestyle='--')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
print('\tMax validation accuracy: %.4f %%' % (np.max(history.history['val_accuracy']) * 100))
print('\tMin validation loss: %.5f' % np.min(history.history['val_loss']))
Training model
FOLD_K = 10
REPEAT = 1
history10 = []
for i in range(REPEAT):
print('-'*80)
print("\n({})\n".format(i+1))
history = process_fold(FOLD_K, us8k_df, epochs=100)
history10.append(history)
########OR########
#reuse other parts
#Xz_train, Xz_test, yz_train, yz_test = train_test_split(FOLD_K, us8k_df)
#train_datagenz, val_datagenz = init_data_aug()
#train_datagenz.fit(Xz_train)
#val_datagenz.fit(Xz_train)
#model = init_model()
# pre-training accuracy
#score = model.evaluate(val_datagenz.flow(Xz_test, yz_test, batch_size=32), verbose=0)
#print("Pre-training accuracy: %.4f%%\n" % (100 * score[1]))
# train the model
#start = datetime.now()
#the_model = model.fit(train_datagenz.flow(Xz_train, yz_train, batch_size=32),
# steps_per_epoch=len(Xz_train) / 32,
# epochs=100,
# validation_data=val_datagenz.flow(Xz_test, yz_test, batch_size=32))
#end = datetime.now()
#print("Training completed in time: ", end - start, '\n')
Save the model, convert and optimize to tiny-ml
export_dir = '/content/gdrive/MyDrive/initial-model'
tf.saved_model.save(model, export_dir)
# Convert the model.
converter = tf.lite.TFLiteConverter.from_saved_model(export_dir)
tflite_model = converter.convert()
# Save the converted model
tflite_models_dir = pathlib.Path("/content/gdrive/MyDrive/tiny-model")
tflite_models_dir.mkdir(exist_ok=True, parents=True)
tflite_model_file = tflite_models_dir/"model-lite.tflite"
tflite_model_file.write_bytes(tflite_model)
# Optimize and save the model
# Convert the model using DEFAULT optimizations: https://github.com/tensorflow/tensorflow/blob/v2.4.1/tensorflow/lite/python/lite.py#L91-L130
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant_model = converter.convert()
tflite_model_quant_file = tflite_models_dir / "model-tiny.tflite"
tflite_model_quant_file.write_bytes(tflite_quant_model)
# Load TFLite model and allocate tensors.
tflite_model_file = '/content/gdrive/MyDrive/tiny-model/model-tiny.tflite' # Change the filename here for Model 2 and 3
interpreter = tf.lite.Interpreter(model_path=tflite_model_file)
interpreter.allocate_tensors()
input_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.get_output_details()[0]["index"]
Bad reuse of datas
# split the data
ignore_1,the_data,ignore_2,the_label = train_test_split(FOLD_K, us8k_df)
Validate the new tiny model
predictions = []
test_labels, test_sound = [], []
for a in range(the_label.shape[0]):
this_data=the_data[a]
this_data=np.expand_dims(this_data, axis=0)
#Slicing the data
this_label=the_label[a]
#######Solution from: https://stackoverflow.com/questions/67068742/valueerror-cannot-set-tensor-dimension-mismatch-got-3-but-expected-4-for-inpu
interpreter.set_tensor(input_index, this_data)
#######
interpreter.invoke()
predictions.append(interpreter.get_tensor(output_index))
test_labels.append(this_label)
Score the data
#The bad done one
score = 0
for item in range(the_label.shape[0]):
#prediction=np.argmax(predictions[item])
prediction=predictions[item]
label = test_labels[item]
result_and = np.logical_and(label, prediction)
#if prediction==label:
if result_and.any():
score=score+1
print("Out of ",the_label.shape[0]," predictions I got " + str(score) + " correct")
#Correct one?
the_score=0
for item in range(the_label.shape[0]):
#prediction=np.argmax(predictions[item])
prediction=np.argmax(predictions[item])
label = np.argmax(test_labels[item])
print(prediction)
print(label)
print("-----------------------")
if label==prediction:
the_score=the_score+1
print("Out of ",the_label.shape[0]," predictions I got " + str(the_score) + " correct")
I adapted the code, opened/broke functions and I searched my problems and errors in google and stackoverflow.
Can you help me?