Use torch-tensorrt with diffusers library

72 Views Asked by At

I'm trying to use torch-tensorrt with the diffusers library, to enhance the inference time on my diffusion models. I tried with a tool example just to see if it works.

from diffusion_models import UNet2DModel
from diffusers import DDPMScheduler, ScoreSdeVeScheduler
from diffusion_inference import DDPMInference, ScoreSdeVeInference
import time
import argparse
import torch
import torch_tensorrt

#manage gpu
force_cpu = False
useCuda = torch.cuda.is_available() and not force_cpu
if useCuda:
    print('Using CUDA.')
    dtype = torch.cuda.FloatTensor
    ltype = torch.cuda.LongTensor
    #MT: add
    device = torch.device("cuda:0")
else:
    print('No CUDA available.')
    dtype = torch.FloatTensor
    ltype = torch.LongTensor
    #MT: add
    device = torch.device("cpu")

model = UNet2DModel(
        #WARNING: This is only for square matrices, need to find a solution in case not square
        sample_size=128,  # the target image resolution
        in_channels=2,  # the number of input channels, 3 for RGB images
        out_channels=1,  # the number of output channels
        layers_per_block=2,  # how many ResNet layers to use per UNet block
        block_out_channels=(128, 128, 256, 256, 512, 512),  # the number of output channes for each UNet block
        down_block_types=('DownBlock2D', 'DownBlock2D', 'DownBlock2D', 'DownBlock2D', 'AttnDownBlock2D', 'DownBlock2D'), 
        up_block_types=('UpBlock2D', 'AttnUpBlock2D', 'UpBlock2D', 'UpBlock2D', 'UpBlock2D', 'UpBlock2D'),
)

input_shape = (8, 1, 128, 128)
input_data = [torch.randn(input_shape).to(device)]
model = model.to(device)
model.eval()

model = torch_tensorrt.compile(
    model,
    inputs=input_data,
    workspace_size = 20 << 30,
    enabled_precisions = {torch.float},
)

noise_scheduler = ScoreSdeVeScheduler(num_train_timesteps=10)
diffusion_inference = ScoreSdeVeInference(model, noise_scheduler,10)

num_iterations = 4
total_time = 0.0
with torch.no_grad():
    input_data = torch.randn(input_shape).to(device).type(dtype)
    #warmup
    for i in range(4):
        output_data = diffusion_inference.inference(input_data, device=device)

    for i in range(num_iterations):
        start_time = time.time()
        output_data = diffusion_inference.inference(input_data, device=device)
        end_time = time.time()
        total_time += end_time - start_time
pytorch_fps = num_iterations / total_time
print(f"PyTorch FPS: {pytorch_fps:.2f}")

But when I run this code, I get the following error:

  File "/xxx/code/x-test10.py", line 42, in <module>
    model = torch_tensorrt.compile(
  File "/xxx/exampletensorrt/lib/python3.10/site-packages/torch_tensorrt/_compile.py", line 132, in compile
    ts_mod = torch.jit.script(module)
  File "/xxx/exampletensorrt/lib/python3.10/site-packages/torch/jit/_script.py", line 1284, in script
    return torch.jit._recursive.create_script_module(
  File "/xxx/exampletensorrt/lib/python3.10/site-packages/torch/jit/_recursive.py", line 480, in create_script_module
    return create_script_module_impl(nn_module, concrete_type, stubs_fn)
  File "/xxx/exampletensorrt/lib/python3.10/site-packages/torch/jit/_recursive.py", line 542, in create_script_module_impl
    script_module = torch.jit.RecursiveScriptModule._construct(cpp_module, init_fn)
  File "/xxx/exampletensorrt/lib/python3.10/site-packages/torch/jit/_script.py", line 614, in _construct
    init_fn(script_module)
  File "/xxx/exampletensorrt/lib/python3.10/site-packages/torch/jit/_recursive.py", line 520, in init_fn
    scripted = create_script_module_impl(orig_value, sub_concrete_type, stubs_fn)
  File "/xxx/exampletensorrt/lib/python3.10/site-packages/torch/jit/_recursive.py", line 546, in create_script_module_impl
    create_methods_and_properties_from_stubs(concrete_type, method_stubs, property_stubs)
  File "/xxx/exampletensorrt/lib/python3.10/site-packages/torch/jit/_recursive.py", line 397, in create_methods_and_properties_from_stubs
    concrete_type._create_methods_and_properties(property_defs, property_rcbs, method_defs, method_rcbs, method_defaults)
RuntimeError: 

get_timestep_embedding(Tensor timesteps, int embedding_dim, bool flip_sin_to_cos=False, float downscale_freq_shift=1., float scale=1., int max_period=10000) -> Tensor:
Expected a value of type 'float' for argument 'downscale_freq_shift' but instead found type 'int'.
:
  File "/xxx/exampletensorrt/lib/python3.10/site-packages/diffusers/models/embeddings.py", line 248
    def forward(self, timesteps):
        t_emb = get_timestep_embedding(
                ~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
            timesteps,
            self.num_channels,

This error doesn't show if I remove the torch_tensorrt.compile. Do you know what's happening here ? Is there a way to use torch-tensorrt with diffusers ?

Here are the full details on my setup.

Libraries:

certifi                  2023.11.17
charset-normalizer       3.3.2
cmake                    3.28.1
diffusers                0.25.1
filelock                 3.13.1
fsspec                   2023.12.2
huggingface-hub          0.20.3
idna                     3.6
importlib-metadata       7.0.1
Jinja2                   3.1.3
lit                      17.0.6
MarkupSafe               2.1.4
mpmath                   1.3.0
networkx                 3.2.1
numpy                    1.26.3
nvidia-cublas-cu11       11.10.3.66
nvidia-cublas-cu12       12.3.4.1
nvidia-cuda-cupti-cu11   11.7.101
nvidia-cuda-nvrtc-cu11   11.7.99
nvidia-cuda-nvrtc-cu12   12.3.107
nvidia-cuda-runtime-cu11 11.7.99
nvidia-cuda-runtime-cu12 12.3.101
nvidia-cudnn-cu11        8.5.0.96
nvidia-cudnn-cu12        8.9.7.29
nvidia-cufft-cu11        10.9.0.58
nvidia-curand-cu11       10.2.10.91
nvidia-cusolver-cu11     11.4.0.1
nvidia-cusparse-cu11     11.7.4.91
nvidia-nccl-cu11         2.14.3
nvidia-nvtx-cu11         11.7.91
packaging                23.2
pillow                   10.2.0
pip                      23.3.2
PyYAML                   6.0.1
regex                    2023.12.25
requests                 2.31.0
safetensors              0.4.2
setuptools               59.6.0
sympy                    1.12
tensorrt                 8.6.1.post1
tensorrt-bindings        8.6.1
tensorrt-libs            8.6.1
torch                    2.0.1
torch-tensorrt           1.4.0
tqdm                     4.66.1
triton                   2.0.0
typing_extensions        4.9.0
urllib3                  2.1.0
wheel                    0.42.0
zipp                     3.17.0

GPU:

nvcc: NVIDIA (R) Cuda compiler driver
Cuda compilation tools, release 11.5, V11.5.119
Build cuda_11.5.r11.5/compiler.30672275_0
0

There are 0 best solutions below