mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-08-30 02:53:41 +08:00
moved the weight squeeze to map_unet_weights, style check
This commit is contained in:
parent
47b0685c79
commit
0e026e6a77
@ -8,7 +8,7 @@ from huggingface_hub import hf_hub_download
|
|||||||
from safetensors import safe_open as safetensor_open
|
from safetensors import safe_open as safetensor_open
|
||||||
|
|
||||||
import mlx.core as mx
|
import mlx.core as mx
|
||||||
from mlx.utils import tree_unflatten, tree_flatten
|
from mlx.utils import tree_unflatten
|
||||||
|
|
||||||
from .clip import CLIPTextModel
|
from .clip import CLIPTextModel
|
||||||
from .config import UNetConfig, CLIPTextModelConfig, AutoencoderConfig, DiffusionConfig
|
from .config import UNetConfig, CLIPTextModelConfig, AutoencoderConfig, DiffusionConfig
|
||||||
@ -31,7 +31,7 @@ _MODELS = {
|
|||||||
"tokenizer_vocab": "tokenizer/vocab.json",
|
"tokenizer_vocab": "tokenizer/vocab.json",
|
||||||
"tokenizer_merges": "tokenizer/merges.txt",
|
"tokenizer_merges": "tokenizer/merges.txt",
|
||||||
},
|
},
|
||||||
"nitrosocke/Ghibli-Diffusion": {
|
"nitrosocke/Ghibli-Diffusion": {
|
||||||
"unet_config": "unet/config.json",
|
"unet_config": "unet/config.json",
|
||||||
"unet": "unet/diffusion_pytorch_model.safetensors",
|
"unet": "unet/diffusion_pytorch_model.safetensors",
|
||||||
"text_encoder_config": "text_encoder/config.json",
|
"text_encoder_config": "text_encoder/config.json",
|
||||||
@ -41,7 +41,7 @@ _MODELS = {
|
|||||||
"diffusion_config": "scheduler/scheduler_config.json",
|
"diffusion_config": "scheduler/scheduler_config.json",
|
||||||
"tokenizer_vocab": "tokenizer/vocab.json",
|
"tokenizer_vocab": "tokenizer/vocab.json",
|
||||||
"tokenizer_merges": "tokenizer/merges.txt",
|
"tokenizer_merges": "tokenizer/merges.txt",
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -87,6 +87,10 @@ def map_unet_weights(key, value):
|
|||||||
if "conv_shortcut.weight" in key:
|
if "conv_shortcut.weight" in key:
|
||||||
value = value.squeeze()
|
value = value.squeeze()
|
||||||
|
|
||||||
|
# Transform the weights from 1x1 convs to linear
|
||||||
|
if len(value.shape) == 4 and ("proj_in" in key or "proj_out" in key):
|
||||||
|
value = value.squeeze()
|
||||||
|
|
||||||
if len(value.shape) == 4:
|
if len(value.shape) == 4:
|
||||||
value = value.transpose(0, 2, 3, 1)
|
value = value.transpose(0, 2, 3, 1)
|
||||||
|
|
||||||
@ -165,23 +169,10 @@ def _flatten(params):
|
|||||||
return [(k, v) for p in params for (k, v) in p]
|
return [(k, v) for p in params for (k, v) in p]
|
||||||
|
|
||||||
|
|
||||||
def _match_shapes(model, weights):
|
|
||||||
#check whether the safetensor weights have the same shape as the model, if not reshape them
|
|
||||||
weight_shapes = {x[0]:x[1].shape for x in weights if isinstance(x[1], mx.array)}
|
|
||||||
arrays_model_shapes = {x[0]:x[1].shape for x in tree_flatten(model) if isinstance(x[1], mx.array)}
|
|
||||||
mismatched_keys = [k for k in weight_shapes if weight_shapes[k]!= arrays_model_shapes.get(k, weight_shapes[k])]
|
|
||||||
weights_dict = dict(weights)
|
|
||||||
for k in mismatched_keys:
|
|
||||||
weights_dict[k] = weights_dict[k].reshape(arrays_model_shapes[k])
|
|
||||||
weights = list(weights_dict.items())
|
|
||||||
return weights
|
|
||||||
|
|
||||||
|
|
||||||
def _load_safetensor_weights(mapper, model, weight_file, float16: bool = False):
|
def _load_safetensor_weights(mapper, model, weight_file, float16: bool = False):
|
||||||
dtype = np.float16 if float16 else np.float32
|
dtype = np.float16 if float16 else np.float32
|
||||||
with safetensor_open(weight_file, framework="numpy") as f:
|
with safetensor_open(weight_file, framework="numpy") as f:
|
||||||
weights = _flatten([mapper(k, f.get_tensor(k).astype(dtype)) for k in f.keys()])
|
weights = _flatten([mapper(k, f.get_tensor(k).astype(dtype)) for k in f.keys()])
|
||||||
weights = _match_shapes(model, weights)
|
|
||||||
model.update(tree_unflatten(weights))
|
model.update(tree_unflatten(weights))
|
||||||
|
|
||||||
|
|
||||||
@ -208,7 +199,9 @@ def load_unet(key: str = _DEFAULT_MODEL, float16: bool = False):
|
|||||||
out_channels=config["out_channels"],
|
out_channels=config["out_channels"],
|
||||||
block_out_channels=config["block_out_channels"],
|
block_out_channels=config["block_out_channels"],
|
||||||
layers_per_block=[config["layers_per_block"]] * n_blocks,
|
layers_per_block=[config["layers_per_block"]] * n_blocks,
|
||||||
num_attention_heads=[config["attention_head_dim"]] * n_blocks if isinstance(config["attention_head_dim"], int) else config["attention_head_dim"],
|
num_attention_heads=[config["attention_head_dim"]] * n_blocks
|
||||||
|
if isinstance(config["attention_head_dim"], int)
|
||||||
|
else config["attention_head_dim"],
|
||||||
cross_attention_dim=[config["cross_attention_dim"]] * n_blocks,
|
cross_attention_dim=[config["cross_attention_dim"]] * n_blocks,
|
||||||
norm_num_groups=config["norm_num_groups"],
|
norm_num_groups=config["norm_num_groups"],
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user