SDXL requires_aesthetics_score=True error

324 Views Asked by At

I'm trying to figure this one out, been at it for a while and can't seem to make any headway. Any help is greatly appreciated!!! It is my first time using Stable Diffusion so maybe I'm missing something here but I was trying to follow the HuggingFace tutorial https://huggingface.co/docs/diffusers/v0.20.0/en/api/pipelines/stable_diffusion/stable_diffusion_xl#1-ensemble-of-expert-denoisers

It was working before but now I'm trying to specify height and width...that seems to be when the problem started?

I've also tried adding requires_aesthetics_score=True to before sending refiner to cuda but that doesn't work -- same error.

ValueError                                Traceback (most recent call last)
Cell In[74], line 1
----> 1 refiner_image = refiner(
      2         prompt="cartoon of colorful monsters frolocking in a dark spooky graveyard with tombstones and graves behind a castle",
      3         num_inference_steps=n_steps,
      4         denoising_end=high_noise_frac,
      5         image=img
      6     ).images[0]

File c:\Users\Mark\anaconda3\envs\auto_content_creator\lib\site-packages\torch\utils\_contextlib.py:115, in context_decorator..decorate_context(*args, **kwargs)
    112 @functools.wraps(func)
    113 def decorate_context(*args, **kwargs):
    114     with ctx_factory():
--> 115         return func(*args, **kwargs)

File c:\Users\Mark\anaconda3\envs\auto_content_creator\lib\site-packages\diffusers\pipelines\stable_diffusion_xl\pipeline_stable_diffusion_xl_img2img.py:910, in StableDiffusionXLImg2ImgPipeline.__call__(self, prompt, prompt_2, image, strength, num_inference_steps, denoising_start, denoising_end, guidance_scale, negative_prompt, negative_prompt_2, num_images_per_prompt, eta, generator, latents, prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds, output_type, return_dict, callback, callback_steps, cross_attention_kwargs, guidance_rescale, original_size, crops_coords_top_left, target_size, aesthetic_score, negative_aesthetic_score)
    908 # 8. Prepare added time ids & embeddings
    909 add_text_embeds = pooled_prompt_embeds
--> 910 add_time_ids, add_neg_time_ids = self._get_add_time_ids(
    911     original_size,
    912     crops_coords_top_left,
    913     target_size,
    914     aesthetic_score,
    915     negative_aesthetic_score,
    916     dtype=prompt_embeds.dtype,
    917 )
    918 add_time_ids = add_time_ids.repeat(batch_size * num_images_per_prompt, 1)
    920 if do_classifier_free_guidance:

File c:\Users\Mark\anaconda3\envs\auto_content_creator\lib\site-packages\diffusers\pipelines\stable_diffusion_xl\pipeline_stable_diffusion_xl_img2img.py:613, in StableDiffusionXLImg2ImgPipeline._get_add_time_ids(self, original_size, crops_coords_top_left, target_size, aesthetic_score, negative_aesthetic_score, dtype)
    607 expected_add_embed_dim = self.unet.add_embedding.linear_1.in_features
    609 if (
    610     expected_add_embed_dim > passed_add_embed_dim
    611     and (expected_add_embed_dim - passed_add_embed_dim) == self.unet.config.addition_time_embed_dim
    612 ):
--> 613     raise ValueError(
    614         f"Model expects an added time embedding vector of length {expected_add_embed_dim}, but a vector of {passed_add_embed_dim} was created. Please make sure to enable `requires_aesthetics_score` with `pipe.register_to_config(requires_aesthetics_score=True)` to make sure `aesthetic_score` {aesthetic_score} and `negative_aesthetic_score` {negative_aesthetic_score} is correctly used by the model."
    615     )
    616 elif (
    617     expected_add_embed_dim < passed_add_embed_dim
    618     and (passed_add_embed_dim - expected_add_embed_dim) == self.unet.config.addition_time_embed_dim
    619 ):
    620     raise ValueError(
    621         f"Model expects an added time embedding vector of length {expected_add_embed_dim}, but a vector of {passed_add_embed_dim} was created. Please make sure to disable `requires_aesthetics_score` with `pipe.register_to_config(requires_aesthetics_score=False)` to make sure `target_size` {target_size} is correctly used by the model."
    622     )

ValueError: Model expects an added time embedding vector of length 2816, but a vector of 2560 was created. Please make sure to enable `requires_aesthetics_score` with `pipe.register_to_config(requires_aesthetics_score=True)` to make sure `aesthetic_score` 6.0 and `negative_aesthetic_score` 2.5 is correctly used by the model.

My code is:

from diffusers import StableDiffusionXLPipeline, DiffusionPipeline
import torch
import os

base = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True
)
base.to("cuda")

refiner = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-refiner-1.0",
    **base.components
)
refiner.to("cuda")
refiner.register_to_config(requires_aesthetics_score=True)

n_steps = 40
high_noise_frac = 0.8
def text_to_image(prompt):
    base_image = base(
        prompt=prompt,
        num_inference_steps=n_steps,
        denoising_end=high_noise_frac,
        output_type="latent",
        height=640,
        width=1536
    ).images  
    refiner_image = refiner(
        prompt=prompt,
        num_inference_steps=n_steps,
        denoising_end=high_noise_frac,
        image=base_image
    ).images[0]
    return refiner_image
img = text_to_image("cartoon of colorful monsters frolocking in a dark spooky graveyard with tombstones and graves behind a castle")
0

There are 0 best solutions below