Throwing error for Tensorflow-TensorRT inference model

40 Views Asked by At
  File "resnet_tftrt.py", line 50, in <module>
    predictions = trt_model(tf.constant(inputs))  # Use TensorRT model for inference
TypeError: '_UserObject' object is not callable

I'm getting this error while running TF-TRT code with precision FP16. I'm providing the code below, it is a simple image classification problem using pretrained model ResNet 50

import tensorflow as tf
from tensorflow.python.compiler.tensorrt import trt_convert as trt
from tensorflow.keras.models import load_model, save_model
import time
import numpy as np
# Load or create a Keras model
model = load_model('resnet50_model_custom_data_tf.h5')
# Specify the directory where you want to save the model in the SavedModel format
saved_model_path = 'saved model path'
# Save the Keras model as a SavedModel
save_model(model, saved_model_path)
# Convert the SavedModel to the TensorRT format
conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
    precision_mode='FP16',
    max_workspace_size_bytes=1 << 25,
    maximum_cached_engines=100
)
# Use the full path to the saved model directory
input_saved_model_dir = saved_model_path
converter = trt.TrtGraphConverterV2(input_saved_model_dir=input_saved_model_dir, conversion_params=conversion_params)
converter.convert()
converter.save(output_saved_model_dir='trt_model')
# Load the converted TensorRT model
trt_model = tf.saved_model.load('trt_model')
# Load test data for accuracy calculation
test_dir = 'test dir'
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=64,
    class_mode='categorical',
    shuffle=False  # Important: Set shuffle to False for accurate predictions
)

# Perform inference and calculate accuracy
correct_predictions = 0
total_samples = 0
start_time = time.time()
for batch in test_generator:
    inputs, labels = batch
    predictions = trt_model(tf.constant(inputs))  # Use TensorRT model for inference
    predicted_labels = np.argmax(predictions, axis=1)
    true_labels = np.argmax(labels, axis=1)
    correct_predictions += np.sum(predicted_labels == true_labels)
    total_samples += len(labels)
    # Break the loop if all samples have been processed
    if total_samples >= len(test_generator.filenames):
        break
end_time = time.time()
# Calculate accuracy
accuracy = correct_predictions / total_samples
print("Accuracy (TensorRT): {:.2%}".format(accuracy))
# Calculate throughput and execution time
throughput = total_samples / (end_time - start_time)
print("Throughput (TensorRT): {:.2f} samples per second".format(throughput))
print("Total Execution Time (TensorRT): {:.4f} seconds".format(end_time - start_time))

I want to check the throughput,accuracy and latency of the TF-TRT converted model.

0

There are 0 best solutions below