The below code automatically downloading necessary model files and weights on Google Colab but not on Windows
Any ideas? How to fix?
import gradio as gr
from diffusers import DiffusionPipeline
import torch
import base64
from io import BytesIO
import os
import gc
from share_btn import community_icon_html, loading_icon_html, share_js
# SDXL code: https://github.com/huggingface/diffusers/pull/3859
model_dir = os.getenv("SDXL_MODEL_DIR")
access_token = os.getenv("my_token")
if model_dir:
# Use local model
model_key_base = os.path.join(model_dir, "stable-diffusion-xl-base-0.9")
model_key_refiner = os.path.join(model_dir, "stable-diffusion-xl-refiner-0.9")
else:
model_key_base = "nichijoufan777/stable-diffusion-xl-base-0.9"
model_key_refiner = "nichijoufan777/stable-diffusion-xl-refiner-0.9"
# Use refiner (enabled by default)
enable_refiner = os.getenv("ENABLE_REFINER", "true").lower() == "true"
# Output images before the refiner and after the refiner
output_images_before_refiner = os.getenv("OUTPUT_IMAGES_BEFORE_REFINER", "false").lower() == "true"
# Create public link
share = os.getenv("SHARE", "false").lower() == "true"
print("Loading model", model_key_base)
pipe = DiffusionPipeline.from_pretrained(model_key_base, torch_dtype=torch.float16, use_safetensors=True, variant="fp16", use_auth_token=access_token)
pipe.enable_model_cpu_offload()
# pipe.to("cuda")
# if using torch < 2.0
# pipe.enable_xformers_memory_efficient_attention()
# pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
if enable_refiner:
print("Loading model", model_key_refiner)
pipe_refiner = DiffusionPipeline.from_pretrained(model_key_refiner, torch_dtype=torch.float16, use_safetensors=True, variant="fp16", use_auth_token=access_token)
pipe_refiner.enable_model_cpu_offload()
# pipe_refiner.to("cuda")
# if using torch < 2.0
# pipe_refiner.enable_xformers_memory_efficient_attention()
# pipe_refiner.unet = torch.compile(pipe_refiner.unet, mode="reduce-overhead", fullgraph=True)
# NOTE: we do not have word list filtering in this gradio demo
is_gpu_busy = False
.
.
.