tflite quantized mobilenet v2 classifier not working

1k Views Asked by At

My goal is to convert a PyTorch Model into a quantized tflite model that can be used for inference on the Edge TPU.

I was able to convert a fairly complex depth estimation model from PyTorch to tflite and I successfully ran it on the Edge TPU. But because not all operations were supported, inference was pretty slow (>800ms).

Number of operations that will run on Edge TPU: 87
Number of operations that will run on CPU: 47

Depth Estimation

Because I want a model that runs fully on the TPU, I tried converting the simplest model I could think of, a MobilenetV2 classification model. But when running the quantized model, I get strangely inaccurate results.

PyTorch TFLite
Samoyed:0.8303 missile: 0.184565
Pomeranian: 0.06989 kuvasz: 0.184565
keeshond: 0.01296 stupa: 0.184565
collie: 0.0108 Samoyed: 0.184565
Great Pyrenees: 0.00989 Arctic fox: 0.184565

Is this caused by quantizing the model from float32 to uint8 or am I doing something wrong? And if it is caused by quantization, how can I mitigate that? The classification example from corral works fine and, as far as I know, is uses the same model.

Conversion Process

PyTorch -> ONNX -> OpenVINO -> TensorFlow -> TensorFlowLite

I wrote my own code to convert the model from PyTorch to ONNX and from TensorFlow(pd) into TFlite. For the other conversion steps, I used the OpenVINO mo.py script and the openvino2tensorflow toll because of the nchw nhwc mismatch between PyTorch and TensorFlow.

Downloads

Depth Estimation Model: https://github.com/AaronZettler/miscellaneous/blob/master/mobilenet_v2_depth_est.pth?raw=true

Classification Model: https://github.com/AaronZettler/miscellaneous/blob/master/mobilenetv2.tflite?raw=true

Labels: https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt

Image: https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg

Code

This code does not require the Edge TPU to be run, but it does require the google coral libraries. If I use different parameters for mean and std, like (2.0, 76.0), I get a solid result for the dog.jpg image but if I try to classify something else I have the same problem.


import numpy as np
from PIL import Image
from pycoral.adapters import classify
from pycoral.adapters import common
from pycoral.utils.dataset import read_label_file
from torchvision import transforms

from tensorflow.lite.python.interpreter import Interpreter


def cropPIL(image, new_width, new_height):
    width, height = image.size

    left = (width - new_width)/2
    top = (height - new_height)/2
    right = (width + new_width)/2
    bottom = (height + new_height)/2

    return image.crop((left, top, right, bottom))

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

def classify_img(image_dir, lables_dir, model_dir, mean, std):
    #loading lables and model
    labels = read_label_file(lables_dir)
    interpreter = Interpreter(model_path=model_dir)
    interpreter.allocate_tensors()
    
    #load an resize image
    size = (256, 256)
    image = Image.open(image_dir).convert('RGB')
    image = image.resize(((int)(size[0]*image.width/image.height), size[1]), Image.ANTIALIAS)
    image = cropPIL(image, 224, 224)
    image = np.asarray(image)

    #normalizing the input image
    params = common.input_details(interpreter, 'quantization_parameters')
    scale = params['scales']
    zero_point = params['zero_points']

    normalized_input = (image - mean) / (std * scale) + zero_point
    np.clip(normalized_input, 0, 255, out=normalized_input)

    #setting the image as input
    common.set_input(interpreter, normalized_input.astype(np.uint8))
    
    #run inference
    interpreter.invoke()

    #get output tensor and run softmax
    output_details = interpreter.get_output_details()[0]
    output_data = interpreter.tensor(output_details['index'])().flatten()
    scores = softmax(output_data.astype(float))

    #get the top 10 classes
    classes = classify.get_classes_from_scores(scores, 5, 0.0)

    print('-------RESULTS--------')
    for c in classes:
       print('%s: %f' % (labels.get(c.id, c.id), c.score))


image_dir  = 'data/dog.jpg'
lables_dir = 'data/imagenet_classes.txt'
model_dir  = 'models/mobilenetv2.tflite'

classify_img(image_dir, lables_dir, model_dir, 114.0, 57.0)

To run the PyTorch model on google colab I had to replace

model = torch.hub.load('pytorch/vision:v0.9.0', 'mobilenet_v2', pretrained=True)

with

model = torchvision.models.mobilenet_v2(pretrained=True)

to make it work.

This is the code I used to test The PyTorch model on my machine.

import torch
from PIL import Image
from torchvision import transforms
import torchvision

import numpy as np
import matplotlib.pyplot as plt
    
def inference(model, input_image, lables_dir):
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    input_tensor = preprocess(input_image)
    input_batch = input_tensor.unsqueeze(0)

    # move the input and model to GPU for speed if available
    if torch.cuda.is_available():
        input_batch = input_batch.to('cuda')
        model.to('cuda')

    with torch.no_grad():
        output = model(input_batch)

    probabilities = torch.nn.functional.softmax(output[0], dim=0)

    # Read the categories
    with open(lables_dir, "r") as f:
        categories = [s.strip() for s in f.readlines()]

    # Show top categories per image
    top5_prob, top5_catid = torch.topk(probabilities, 5)
    result = {}
    for i in range(top5_prob.size(0)):
        result[categories[top5_catid[i]]] = top5_prob[i].item()
    return result

def classify(image_dir, lables_dir):
    model = torchvision.models.mobilenet_v2(pretrained=True)
    model.eval()

    im = Image.open(image_dir)
    results = inference(model, im, lables_dir)
    for result in results:
        print(f'{result}: {round(results[result], 5)}')


classify('data/dog.jpg', 'data/imagenet_classes.txt')
1

There are 1 best solutions below

9
On

EdgeTPU mapping of PReLU (LeakyReLU) is now supported in openvino2tensorflow v1.20.4. However, due to the large size of the model, it is not possible to map all operations to the EdgeTPU. Therefore, the part of the EdgeTPU that does not fit in RAM is offloaded to the CPU for inference, which is very slow. In this case, inference by the CPU alone is 4 to 5 times faster. EdgeTPU does not support PReLU (LeakyReLU), so the operations must be replaced. However, openvino2tensorflow v1.20.4 automatically replaces the operations in the conversion process.

docker run --gpus all -it --rm \
-v `pwd`:/home/user/workdir \
pinto0309/openvino2tensorflow:latest

cd workdir

MODEL=depth_estimation_mbnv2

H=180
W=320
$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer/mo.py \
--input_model ${MODEL}_${H}x${W}.onnx \
--data_type FP32 \
--output_dir ${H}x${W}/openvino/FP32
$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer/mo.py \
--input_model ${MODEL}_${H}x${W}.onnx \
--data_type FP16 \
--output_dir ${H}x${W}/openvino/FP16
mkdir -p ${H}x${W}/openvino/myriad
${INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/lib/intel64/myriad_compile \
-m ${H}x${W}/openvino/FP16/${MODEL}_${H}x${W}.xml \
-ip U8 \
-VPU_NUMBER_OF_SHAVES 4 \
-VPU_NUMBER_OF_CMX_SLICES 4 \
-o ${H}x${W}/openvino/myriad/${MODEL}_${H}x${W}.blob

openvino2tensorflow \
--model_path ${H}x${W}/openvino/FP32/${MODEL}_${H}x${W}.xml \
--output_saved_model \
--output_pb \
--output_no_quant_float32_tflite \
--output_weight_quant_tflite \
--output_float16_quant_tflite \
--output_integer_quant_tflite \
--string_formulas_for_normalization 'data / 255' \
--output_integer_quant_type 'uint8' \
--output_tfjs \
--output_coreml \
--output_tftrt
mv saved_model saved_model_${H}x${W}

openvino2tensorflow \
--model_path ${H}x${W}/openvino/FP32/${MODEL}_${H}x${W}.xml \
--output_saved_model \
--output_pb \
--output_edgetpu \
--string_formulas_for_normalization 'data / 255' \
--output_integer_quant_type 'uint8'
mv saved_model/model_full_integer_quant.tflite saved_model_${H}x${W}/model_full_integer_quant.tflite
mv saved_model/model_full_integer_quant_edgetpu.tflite saved_model_${H}x${W}/model_full_integer_quant_edgetpu.tflite

mv ${H}x${W}/openvino saved_model_${H}x${W}/openvino
mv ${MODEL}_${H}x${W}.onnx saved_model_${H}x${W}/${MODEL}_${H}x${W}.onnx


H=240
W=320
$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer/mo.py \
--input_model ${MODEL}_${H}x${W}.onnx \
--data_type FP32 \
--output_dir ${H}x${W}/openvino/FP32
$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer/mo.py \
--input_model ${MODEL}_${H}x${W}.onnx \
--data_type FP16 \
--output_dir ${H}x${W}/openvino/FP16
mkdir -p ${H}x${W}/openvino/myriad
${INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/lib/intel64/myriad_compile \
-m ${H}x${W}/openvino/FP16/${MODEL}_${H}x${W}.xml \
-ip U8 \
-VPU_NUMBER_OF_SHAVES 4 \
-VPU_NUMBER_OF_CMX_SLICES 4 \
-o ${H}x${W}/openvino/myriad/${MODEL}_${H}x${W}.blob

openvino2tensorflow \
--model_path ${H}x${W}/openvino/FP32/${MODEL}_${H}x${W}.xml \
--output_saved_model \
--output_pb \
--output_no_quant_float32_tflite \
--output_weight_quant_tflite \
--output_float16_quant_tflite \
--output_integer_quant_tflite \
--string_formulas_for_normalization 'data / 255' \
--output_integer_quant_type 'uint8' \
--output_tfjs \
--output_coreml \
--output_tftrt
mv saved_model saved_model_${H}x${W}

openvino2tensorflow \
--model_path ${H}x${W}/openvino/FP32/${MODEL}_${H}x${W}.xml \
--output_saved_model \
--output_pb \
--output_edgetpu \
--string_formulas_for_normalization 'data / 255' \
--output_integer_quant_type 'uint8'
mv saved_model/model_full_integer_quant.tflite saved_model_${H}x${W}/model_full_integer_quant.tflite
mv saved_model/model_full_integer_quant_edgetpu.tflite saved_model_${H}x${W}/model_full_integer_quant_edgetpu.tflite

mv ${H}x${W}/openvino saved_model_${H}x${W}/openvino
mv ${MODEL}_${H}x${W}.onnx saved_model_${H}x${W}/${MODEL}_${H}x${W}.onnx
  • PReLU (LeakyReLU) to Maximum (ReLU), Minimum, Mul, Add
    From:
    enter image description here To:
    enter image description here
  • EdgeTPU model
    enter image description here
    enter image description here