mlx-examples/llama/convert.py

# Copyright © 2023 Apple Inc.

import argparse
import collections
import glob
from pathlib import Path

import numpy as np
import torch

SHARD_FIRST = ["wv", "wq", "wk", "w1", "w3", "output"]
SHARD_SECOND = ["tok_embeddings", "wo", "w2"]
SHARD_WEIGHTS = set(SHARD_FIRST + SHARD_SECOND)


def shard_key(k):
    keys = k.split(".")
    if len(keys) < 2:
        return None
    return keys[-2]


def unshard(k, v):
    wn = shard_key(k)
    if wn not in SHARD_WEIGHTS:
        return v
    elif wn in SHARD_FIRST:
        axis = 0
    elif wn in SHARD_SECOND:
        axis = 1
    else:
        raise ValueError("Invalid weight name")
    return np.concatenate(v, axis=axis)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Convert Llama weights to MLX")
    parser.add_argument(
        "--model_path",
        help="Path to the Torch model. The MLX weights will also be saved there.",
    )
    args = parser.parse_args()

    model_path = Path(args.model_path)
    torch_files = glob.glob(str(model_path / "consolidated.*.pth"))
    weights = collections.defaultdict(list)
    for wf in torch_files:
        state = torch.load(wf, map_location=torch.device("cpu"))
        for k, v in state.items():
            v = v.to(torch.float16).numpy()
            if shard_key(k) in SHARD_WEIGHTS:
                weights[k].append(v)
            else:
                weights[k] = v

    out_file = str(model_path / "weights.npz")
    for k, v in weights.items():
        weights[k] = unshard(k, v)
    np.savez(out_file, **weights)
add copyright in source 2023-12-01 03:08:53 +08:00			`# Copyright © 2023 Apple Inc.`

Add the Llama and Stable Diffusion examples 2023-11-30 02:38:20 +08:00			`import argparse`
llama v2 with sharded weights 2023-12-13 04:48:15 +08:00			`import collections`
			`import glob`
			`from pathlib import Path`
Add the Llama and Stable Diffusion examples 2023-11-30 02:38:20 +08:00
			`import numpy as np`
			`import torch`

llama v2 with sharded weights 2023-12-13 04:48:15 +08:00			`SHARD_FIRST = ["wv", "wq", "wk", "w1", "w3", "output"]`
			`SHARD_SECOND = ["tok_embeddings", "wo", "w2"]`
			`SHARD_WEIGHTS = set(SHARD_FIRST + SHARD_SECOND)`
Add the Llama and Stable Diffusion examples 2023-11-30 02:38:20 +08:00

llama v2 with sharded weights 2023-12-13 04:48:15 +08:00			`def shard_key(k):`
			`keys = k.split(".")`
			`if len(keys) < 2:`
			`return None`
			`return keys[-2]`
Add the Llama and Stable Diffusion examples 2023-11-30 02:38:20 +08:00

llama v2 with sharded weights 2023-12-13 04:48:15 +08:00			`def unshard(k, v):`
			`wn = shard_key(k)`
			`if wn not in SHARD_WEIGHTS:`
			`return v`
			`elif wn in SHARD_FIRST:`
			`axis = 0`
			`elif wn in SHARD_SECOND:`
			`axis = 1`
			`else:`
			`raise ValueError("Invalid weight name")`
			`return np.concatenate(v, axis=axis)`
Add the Llama and Stable Diffusion examples 2023-11-30 02:38:20 +08:00

			`if __name__ == "__main__":`
Fix unsupported ScalarType BFloat16 2023-12-06 20:30:59 +08:00			`parser = argparse.ArgumentParser(description="Convert Llama weights to MLX")`
llama v2 with sharded weights 2023-12-13 04:48:15 +08:00			`parser.add_argument(`
			`"--model_path",`
			`help="Path to the Torch model. The MLX weights will also be saved there.",`
			`)`
Add the Llama and Stable Diffusion examples 2023-11-30 02:38:20 +08:00			`args = parser.parse_args()`

llama v2 with sharded weights 2023-12-13 04:48:15 +08:00			`model_path = Path(args.model_path)`
			`torch_files = glob.glob(str(model_path / "consolidated.*.pth"))`
			`weights = collections.defaultdict(list)`
			`for wf in torch_files:`
			`state = torch.load(wf, map_location=torch.device("cpu"))`
			`for k, v in state.items():`
			`v = v.to(torch.float16).numpy()`
			`if shard_key(k) in SHARD_WEIGHTS:`
			`weights[k].append(v)`
			`else:`
			`weights[k] = v`

			`out_file = str(model_path / "weights.npz")`
			`for k, v in weights.items():`
			`weights[k] = unshard(k, v)`
			`np.savez(out_file, **weights)`