I'm trying to use torch-tensorrt with the diffusers library, to enhance the inference time on my diffusion models. I tried with a tool example just to see if it works.
from diffusion_models import UNet2DModel
from diffusers import DDPMScheduler, ScoreSdeVeScheduler
from diffusion_inference import DDPMInference, ScoreSdeVeInference
import time
import argparse
import torch
import torch_tensorrt
#manage gpu
force_cpu = False
useCuda = torch.cuda.is_available() and not force_cpu
if useCuda:
print('Using CUDA.')
dtype = torch.cuda.FloatTensor
ltype = torch.cuda.LongTensor
#MT: add
device = torch.device("cuda:0")
else:
print('No CUDA available.')
dtype = torch.FloatTensor
ltype = torch.LongTensor
#MT: add
device = torch.device("cpu")
model = UNet2DModel(
#WARNING: This is only for square matrices, need to find a solution in case not square
sample_size=128, # the target image resolution
in_channels=2, # the number of input channels, 3 for RGB images
out_channels=1, # the number of output channels
layers_per_block=2, # how many ResNet layers to use per UNet block
block_out_channels=(128, 128, 256, 256, 512, 512), # the number of output channes for each UNet block
down_block_types=('DownBlock2D', 'DownBlock2D', 'DownBlock2D', 'DownBlock2D', 'AttnDownBlock2D', 'DownBlock2D'),
up_block_types=('UpBlock2D', 'AttnUpBlock2D', 'UpBlock2D', 'UpBlock2D', 'UpBlock2D', 'UpBlock2D'),
)
input_shape = (8, 1, 128, 128)
input_data = [torch.randn(input_shape).to(device)]
model = model.to(device)
model.eval()
model = torch_tensorrt.compile(
model,
inputs=input_data,
workspace_size = 20 << 30,
enabled_precisions = {torch.float},
)
noise_scheduler = ScoreSdeVeScheduler(num_train_timesteps=10)
diffusion_inference = ScoreSdeVeInference(model, noise_scheduler,10)
num_iterations = 4
total_time = 0.0
with torch.no_grad():
input_data = torch.randn(input_shape).to(device).type(dtype)
#warmup
for i in range(4):
output_data = diffusion_inference.inference(input_data, device=device)
for i in range(num_iterations):
start_time = time.time()
output_data = diffusion_inference.inference(input_data, device=device)
end_time = time.time()
total_time += end_time - start_time
pytorch_fps = num_iterations / total_time
print(f"PyTorch FPS: {pytorch_fps:.2f}")
But when I run this code, I get the following error:
File "/xxx/code/x-test10.py", line 42, in <module>
model = torch_tensorrt.compile(
File "/xxx/exampletensorrt/lib/python3.10/site-packages/torch_tensorrt/_compile.py", line 132, in compile
ts_mod = torch.jit.script(module)
File "/xxx/exampletensorrt/lib/python3.10/site-packages/torch/jit/_script.py", line 1284, in script
return torch.jit._recursive.create_script_module(
File "/xxx/exampletensorrt/lib/python3.10/site-packages/torch/jit/_recursive.py", line 480, in create_script_module
return create_script_module_impl(nn_module, concrete_type, stubs_fn)
File "/xxx/exampletensorrt/lib/python3.10/site-packages/torch/jit/_recursive.py", line 542, in create_script_module_impl
script_module = torch.jit.RecursiveScriptModule._construct(cpp_module, init_fn)
File "/xxx/exampletensorrt/lib/python3.10/site-packages/torch/jit/_script.py", line 614, in _construct
init_fn(script_module)
File "/xxx/exampletensorrt/lib/python3.10/site-packages/torch/jit/_recursive.py", line 520, in init_fn
scripted = create_script_module_impl(orig_value, sub_concrete_type, stubs_fn)
File "/xxx/exampletensorrt/lib/python3.10/site-packages/torch/jit/_recursive.py", line 546, in create_script_module_impl
create_methods_and_properties_from_stubs(concrete_type, method_stubs, property_stubs)
File "/xxx/exampletensorrt/lib/python3.10/site-packages/torch/jit/_recursive.py", line 397, in create_methods_and_properties_from_stubs
concrete_type._create_methods_and_properties(property_defs, property_rcbs, method_defs, method_rcbs, method_defaults)
RuntimeError:
get_timestep_embedding(Tensor timesteps, int embedding_dim, bool flip_sin_to_cos=False, float downscale_freq_shift=1., float scale=1., int max_period=10000) -> Tensor:
Expected a value of type 'float' for argument 'downscale_freq_shift' but instead found type 'int'.
:
File "/xxx/exampletensorrt/lib/python3.10/site-packages/diffusers/models/embeddings.py", line 248
def forward(self, timesteps):
t_emb = get_timestep_embedding(
~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
timesteps,
self.num_channels,
This error doesn't show if I remove the torch_tensorrt.compile. Do you know what's happening here ? Is there a way to use torch-tensorrt with diffusers ?
Here are the full details on my setup.
Libraries:
certifi 2023.11.17
charset-normalizer 3.3.2
cmake 3.28.1
diffusers 0.25.1
filelock 3.13.1
fsspec 2023.12.2
huggingface-hub 0.20.3
idna 3.6
importlib-metadata 7.0.1
Jinja2 3.1.3
lit 17.0.6
MarkupSafe 2.1.4
mpmath 1.3.0
networkx 3.2.1
numpy 1.26.3
nvidia-cublas-cu11 11.10.3.66
nvidia-cublas-cu12 12.3.4.1
nvidia-cuda-cupti-cu11 11.7.101
nvidia-cuda-nvrtc-cu11 11.7.99
nvidia-cuda-nvrtc-cu12 12.3.107
nvidia-cuda-runtime-cu11 11.7.99
nvidia-cuda-runtime-cu12 12.3.101
nvidia-cudnn-cu11 8.5.0.96
nvidia-cudnn-cu12 8.9.7.29
nvidia-cufft-cu11 10.9.0.58
nvidia-curand-cu11 10.2.10.91
nvidia-cusolver-cu11 11.4.0.1
nvidia-cusparse-cu11 11.7.4.91
nvidia-nccl-cu11 2.14.3
nvidia-nvtx-cu11 11.7.91
packaging 23.2
pillow 10.2.0
pip 23.3.2
PyYAML 6.0.1
regex 2023.12.25
requests 2.31.0
safetensors 0.4.2
setuptools 59.6.0
sympy 1.12
tensorrt 8.6.1.post1
tensorrt-bindings 8.6.1
tensorrt-libs 8.6.1
torch 2.0.1
torch-tensorrt 1.4.0
tqdm 4.66.1
triton 2.0.0
typing_extensions 4.9.0
urllib3 2.1.0
wheel 0.42.0
zipp 3.17.0
GPU:
nvcc: NVIDIA (R) Cuda compiler driver
Cuda compilation tools, release 11.5, V11.5.119
Build cuda_11.5.r11.5/compiler.30672275_0