From 4c881639414dcd1867a15b90e21882925ff1280c Mon Sep 17 00:00:00 2001 From: Pawel Kowalski Date: Fri, 15 Dec 2023 22:01:02 +0100 Subject: [PATCH] Stable diffusion - check model weights shape and support int for "attention_head_dim" (#85) * Allow integer as attention_head_dim * Reshape downloaded weights to match model if there is a mismatch --- stable_diffusion/stable_diffusion/model_io.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/stable_diffusion/stable_diffusion/model_io.py b/stable_diffusion/stable_diffusion/model_io.py index c2669de4..57879ef9 100644 --- a/stable_diffusion/stable_diffusion/model_io.py +++ b/stable_diffusion/stable_diffusion/model_io.py @@ -76,6 +76,10 @@ def map_unet_weights(key, value): if "conv_shortcut.weight" in key: value = value.squeeze() + # Transform the weights from 1x1 convs to linear + if len(value.shape) == 4 and ("proj_in" in key or "proj_out" in key): + value = value.squeeze() + if len(value.shape) == 4: value = value.transpose(0, 2, 3, 1) @@ -184,7 +188,9 @@ def load_unet(key: str = _DEFAULT_MODEL, float16: bool = False): out_channels=config["out_channels"], block_out_channels=config["block_out_channels"], layers_per_block=[config["layers_per_block"]] * n_blocks, - num_attention_heads=config["attention_head_dim"], + num_attention_heads=[config["attention_head_dim"]] * n_blocks + if isinstance(config["attention_head_dim"], int) + else config["attention_head_dim"], cross_attention_dim=[config["cross_attention_dim"]] * n_blocks, norm_num_groups=config["norm_num_groups"], )