using mobilnetv2 on cifar10, cifar100, and imagenet accuracy is not enough

228 Views Asked by At

I have been making some experiments with mobilenetv2 and used dataset with cifar10, cifar100.

when I used the code, it does not give me accuracy above 80%(accuracy using validation dataset)

when I implement the code, since the mobilenetv2 are made for imagenet dataset, which has dimension approximately 228x228, I have changed cifar10, cifar100 dimension into 96x96 by using tf.image.resize(96,96). But when I google about the accuracy reached by using MBv2 with cifar10, 100 they usually say the accuracy reached above 90%....

  1. I want to know if i have used the wrong code.
  2. I want to know when google say they have reached accuracy of 90%, which test dataset they have been using? in cifar10 they have 50,000 trainset 10,000 testset. They are using these 10,000 testset right?
  3. I have done no tuning in my code. Only the model structure itself. Do I have to do some fine tuning to reach higher accuracy? ifso, what kind of tuning should I do more?
  4. I have tried tf.application.mobilnetv2 as a model. but it gives me same ~80% accuracy in cifar10, ~55% in cifar100 (which are both resized to 96x96)

I'll attach the code just in case.

#!/usr/bin/env python
# coding: utf-8

# In[1]:


test_list_acc = [0.7975999712944031]
import statistics
print(statistics.mean(test_list_acc))


# In[2]:


import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
os.system("clear")

from tensorflow import keras
from tensorflow import keras
from keras import layers, models, datasets, activations
from keras.layers import Conv2D, Dense, Dropout, Flatten
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import GlobalMaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from matplotlib import pyplot
from keras import datasets

from keras.layers.core import Dense, Dropout, Activation, Flatten
import numpy as np
from keras.utils.np_utils import to_categorical
import matplotlib.pyplot as plt


# In[3]:


from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, ReLU, BatchNormalization, add,Softmax, AveragePooling2D, Dense, Input, GlobalAveragePooling2D
from tensorflow.keras.models import Model


# In[4]:


import tensorflow as tf

# from tensorflow import datasets

print(tf.__version__)
from tensorflow.python.client import device_lib
device_lib.list_local_devices()


# In[5]:


import tensorflow as tf
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
print(gpu_devices)
#tf.config.experimental.set_memory_growth(gpu_devices[0], True)


# In[6]:


import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


# In[7]:


#import data
(train_images,train_labels),(test_images,test_labels) = keras.datasets.cifar10.load_data()
print("Training Images Shape (x train shape) :", train_images.shape)
print("Label of training images (y train shape) :",train_labels.shape)
print("Test Images Shape (x test shape) :",test_images.shape)
print("Label of test images (y test shape) :",test_labels.shape)


# In[8]:


# np.reshape(train_images, (-1,224,224,3))
# train_images = train_images.reshape(50000,224,224,3)
# available 96,128,160,192
train_images = tf.image.resize(train_images,[96,96])
test_images = tf.image.resize(test_images,[96,96])
print("Training Images Shape (x train shape) :", train_images.shape)
print("Label of training images (y train shape) :",train_labels.shape)
print("Test Images Shape (x test shape) :",test_images.shape)
print("Label of test images (y test shape) :",test_labels.shape)


# In[9]:


train_images, test_images = train_images / 255, test_images / 255


# In[10]:


def expansion_block(x,t,filters,block_id):
    prefix = 'block_{}_'.format(block_id)
    total_filters = t*filters
    x = layers.Conv2D(total_filters,1,padding='same',use_bias=False, name = prefix +'expand')(x)
    x = layers.BatchNormalization(name=prefix +'expand_bn')(x)
    x = layers.ReLU(6,name = prefix +'expand_relu')(x)
    return x

def depthwise_block(x,stride,block_id):
    prefix = 'block_{}_'.format(block_id)
    x = layers.DepthwiseConv2D(3,strides=(stride,stride),padding ='same', use_bias = False, name = prefix + 'depthwise_conv')(x)
    x = layers.BatchNormalization(name=prefix +'dw_bn')(x)
    x = layers.ReLU(6,name=prefix +'dw_relu')(x)
    return x

def projection_block(x,out_channels,block_id):
    prefix = 'block_{}_'.format(block_id)
    x = layers.Conv2D(filters = out_channels,kernel_size = 1,padding='same',use_bias=False,name= prefix + 'compress')(x)
    x = layers.BatchNormalization(name=prefix +'compress_bn')(x)
    return x


# In[11]:


def Bottleneck(x,t,filters, out_channels,stride,block_id):
    y = expansion_block(x,t,filters,block_id)
    y = depthwise_block(y,stride,block_id)
    y = projection_block(y, out_channels,block_id)
    if y.shape[-1]==x.shape[-1]:
        y = layers.add([x,y])
    return y


# In[14]:


def MobileNetV2(input_image = (96,96,3), n_classes=10):
    # input = keras.Input(input_image)
    input = keras.Input(shape = input_image)

    x = layers.Conv2D(32,kernel_size=3,strides=(2,2),padding = 'same', use_bias=False)(input)
    x = layers.BatchNormalization(name='conv1_bn')(x)
    x = layers.ReLU(6, name = 'conv1_relu')(x)

    # 17 Bottlenecks

    x = depthwise_block(x,stride=1,block_id=1)
    x = projection_block(x, out_channels=16,block_id=1)

    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 24, stride = 2,block_id = 2)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 24, stride = 1,block_id = 3)

    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 32, stride = 2,block_id = 4)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 32, stride = 1,block_id = 5)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 32, stride = 1,block_id = 6)

    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 64, stride = 2,block_id = 7)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 64, stride = 1,block_id = 8)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 64, stride = 1,block_id = 9)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 64, stride = 1,block_id = 10)

    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 96, stride = 1,block_id = 11)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 96, stride = 1,block_id = 12)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 96, stride = 1,block_id = 13)

    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 160, stride = 2,block_id = 14)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 160, stride = 1,block_id = 15)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 160, stride = 1,block_id = 16)

    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 320, stride = 1,block_id = 17)


    #1*1 conv
    x = layers.Conv2D(filters = 1280,kernel_size = 1,padding='same',use_bias=False, name = 'last_conv')(x)
    x = layers.BatchNormalization(name='last_bn')(x)
    x = layers.ReLU(6,name='last_relu')(x)

    #AvgPool 7*7
    x = layers.GlobalAveragePooling2D(name='global_average_pool')(x)

    output = layers.Dense(n_classes,activation='softmax')(x)

    model = Model(input, output)

    return model


# In[15]:


n_classes = 10
input_shape = (96,96,3)

model = MobileNetV2(input_shape,n_classes)
model.summary()


# In[16]:


model.compile(loss="sparse_categorical_crossentropy",
              optimizer="Adam", metrics=["accuracy"])


# In[17]:


#Fit the model
hist= model.fit(train_images, train_labels, batch_size = 256, epochs=100, 
                validation_data = (test_images, test_labels))


# In[18]:


test_loss, test_acc = model.evaluate(test_images, test_labels, batch_size = 256)
print("test loss : ", test_loss)
print("test acc  : ", test_acc)


# In[19]:


#loss curve
plt.figure(figsize=[6,4])
plt.plot(hist.history['loss'], 'black', linewidth=2.0)
plt.plot(hist.history['val_loss'], 'green', linewidth=2.0)

# 어떤 그래프가 어떤 것인지 표시해준다 -> legend
plt.legend(['Training Loss', 'validation Loss'], fontsize=14)
plt.xlabel('Epochs', fontsize = 10)
plt.ylabel('Loss', fontsize=10)
plt.title('Loss Curves', fontsize =12)


# In[20]:


#Accuracy Curve
plt.figure(figsize = [6,4])
plt.plot(hist.history['accuracy'], 'black', linewidth=2.0)
plt.plot(hist.history['val_accuracy'], 'blue', linewidth=2.0)

# 어떤 그래프가 어떤 것인지 표시해준다 -> legend
plt.legend(['Training Accuracy', 'Validation Accuracy'], fontsize=14)
plt.xlabel('Epochs', fontsize = 10)
plt.ylabel('Accuracy', fontsize=10)
plt.title('Accuracy Curves', fontsize = 12)


# In[21]:


prediction = model.predict(test_images)
prediction
prediction.shape
history_dict = hist.history
print(history_dict.keys())
1

There are 1 best solutions below

3
On

Why did you change the resolution of the input to 96x96 instead of 228x228 which was the resolution of the imagenet dataset? The neural nets are normally optimized for a certain size of input, that might be the reason for the poor performance (at least that's what Not getting Proper Accurcacy for cifar10 dadatset with mobilenetv2 on CPU suggests) Another fine tuning step could be to change the batch size in the training for better performance, but without testing it myself I wouldn't bet on that to be a fix.