mlx-examples/lora/convert.py

# Copyright © 2023 Apple Inc.

import argparse
import copy
import json
import shutil
from pathlib import Path

import mlx.core as mx
import mlx.nn as nn
import numpy as np
import torch
from mlx.utils import tree_flatten, tree_map, tree_unflatten

from lora import Model, ModelArgs


def quantize(weights, config, args):
    quantized_config = copy.deepcopy(config)

    # Load the model:
    model = Model(ModelArgs(**config))
    weights = tree_map(mx.array, weights)
    model.update(tree_unflatten(list(weights.items())))

    # Quantize the model:
    nn.QuantizedLinear.quantize_module(
        model,
        args.q_group_size,
        args.q_bits,
        linear_class_predicate=lambda m: isinstance(m, nn.Linear)
        and m.weight.shape[0] != config["vocab_size"],
    )

    # Update the config:
    quantized_config["quantization"] = {
        "group_size": args.q_group_size,
        "bits": args.q_bits,
    }
    quantized_weights = dict(tree_flatten(model.parameters()))

    return quantized_weights, quantized_config


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Convert Mistral or Llama models to MLX.",
    )
    parser.add_argument(
        "--torch-path",
        type=str,
        default="mistral-7B-v0.1/",
        help="Path to the torch model directory",
    )
    parser.add_argument(
        "--mlx-path",
        type=str,
        default="mlx_model/",
        help="The directory to store the mlx model",
    )
    parser.add_argument(
        "-q",
        "--quantize",
        help="Generate a quantized model.",
        action="store_true",
    )
    parser.add_argument(
        "--q-group-size",
        help="Group size for quantization.",
        type=int,
        default=64,
    )
    parser.add_argument(
        "--q-bits",
        help="Bits per weight for quantization.",
        type=int,
        default=4,
    )
    args = parser.parse_args()

    args = parser.parse_args()

    torch_path = Path(args.torch_path)
    mlx_path = Path(args.mlx_path)
    mlx_path.mkdir(parents=True, exist_ok=True)

    # Copy the tokenizer
    tokenizer_path = torch_path / "tokenizer.model"
    if not tokenizer_path.exists():
        print(f"Make sure there is a file tokenizer.model in {args.torch_path}")
        exit(0)
    shutil.copyfile(
        str(tokenizer_path),
        str(mlx_path / "tokenizer.model"),
    )

    # Load the torch model weights to numpy:
    weights = torch.load(str(torch_path / "consolidated.00.pth"))
    for k, v in weights.items():
        weights[k] = v.to(torch.float16).numpy()

    # Standardize the params
    with open(torch_path / "params.json", "r") as f:
        config = json.loads(f.read())
        unused = ["multiple_of", "sliding_window"]
        for k in unused:
            config.pop(k, None)
        n_heads = config["n_heads"]
        if "n_kv_heads" not in config:
            config["n_kv_heads"] = n_heads
        if "head_dim" not in config:
            config["head_dim"] = config["dim"] // n_heads
        if "hidden_dim" not in config:
            config["hidden_dim"] = weights["layers.0.feed_forward.w1.weight"].shape[0]
        if config.get("vocab_size", -1) < 0:
            config["vocab_size"] = weights["output.weight"].shape[0]

    if args.quantize:
        print("[INFO] Quantizing")
        weights, config = quantize(weights, config, args)

    np.savez(str(mlx_path / "weights.npz"), **weights)

    with open(mlx_path / "config.json", "w") as outfile:
        json.dump(config, outfile, indent=4)
add copyright in source 2023-12-01 03:08:53 +08:00			`# Copyright © 2023 Apple Inc.`

lora 2023-11-30 06:14:11 +08:00			`import argparse`
Qlora (#219) qlora 2024-01-05 13:05:59 +08:00			`import copy`
generalize lora finetuning for llama and mistral 2023-12-10 06:13:55 +08:00			`import json`
Add llms subdir + update README (#145) * add llms subdir + update README * nits * use same pre-commit as mlx * update readmes a bit * format 2023-12-21 02:22:25 +08:00			`import shutil`
			`from pathlib import Path`
lora 2023-11-30 06:14:11 +08:00
Qlora (#219) qlora 2024-01-05 13:05:59 +08:00			`import mlx.core as mx`
			`import mlx.nn as nn`
Add llms subdir + update README (#145) * add llms subdir + update README * nits * use same pre-commit as mlx * update readmes a bit * format 2023-12-21 02:22:25 +08:00			`import numpy as np`
			`import torch`
Qlora (#219) qlora 2024-01-05 13:05:59 +08:00			`from mlx.utils import tree_flatten, tree_map, tree_unflatten`

			`from lora import Model, ModelArgs`


			`def quantize(weights, config, args):`
			`quantized_config = copy.deepcopy(config)`

			`# Load the model:`
			`model = Model(ModelArgs(**config))`
			`weights = tree_map(mx.array, weights)`
			`model.update(tree_unflatten(list(weights.items())))`

			`# Quantize the model:`
			`nn.QuantizedLinear.quantize_module(`
			`model,`
			`args.q_group_size,`
			`args.q_bits,`
			`linear_class_predicate=lambda m: isinstance(m, nn.Linear)`
			`and m.weight.shape[0] != config["vocab_size"],`
			`)`

			`# Update the config:`
			`quantized_config["quantization"] = {`
			`"group_size": args.q_group_size,`
			`"bits": args.q_bits,`
			`}`
			`quantized_weights = dict(tree_flatten(model.parameters()))`

			`return quantized_weights, quantized_config`

lora 2023-11-30 06:14:11 +08:00
generalize lora finetuning for llama and mistral 2023-12-10 06:13:55 +08:00			`if __name__ == "__main__":`
			`parser = argparse.ArgumentParser(`
			`description="Convert Mistral or Llama models to MLX.",`
			`)`
			`parser.add_argument(`
Qlora (#219) qlora 2024-01-05 13:05:59 +08:00			`"--torch-path",`
generalize lora finetuning for llama and mistral 2023-12-10 06:13:55 +08:00			`type=str,`
			`default="mistral-7B-v0.1/",`
Qlora (#219) qlora 2024-01-05 13:05:59 +08:00			`help="Path to the torch model directory",`
generalize lora finetuning for llama and mistral 2023-12-10 06:13:55 +08:00			`)`
			`parser.add_argument(`
Qlora (#219) qlora 2024-01-05 13:05:59 +08:00			`"--mlx-path",`
generalize lora finetuning for llama and mistral 2023-12-10 06:13:55 +08:00			`type=str,`
Qlora (#219) qlora 2024-01-05 13:05:59 +08:00			`default="mlx_model/",`
generalize lora finetuning for llama and mistral 2023-12-10 06:13:55 +08:00			`help="The directory to store the mlx model",`
			`)`
Qlora (#219) qlora 2024-01-05 13:05:59 +08:00			`parser.add_argument(`
			`"-q",`
			`"--quantize",`
			`help="Generate a quantized model.",`
			`action="store_true",`
			`)`
			`parser.add_argument(`
			`"--q-group-size",`
			`help="Group size for quantization.",`
			`type=int,`
			`default=64,`
			`)`
			`parser.add_argument(`
			`"--q-bits",`
			`help="Bits per weight for quantization.",`
			`type=int,`
			`default=4,`
			`)`
			`args = parser.parse_args()`

generalize lora finetuning for llama and mistral 2023-12-10 06:13:55 +08:00			`args = parser.parse_args()`
lora 2023-11-30 06:14:11 +08:00
Qlora (#219) qlora 2024-01-05 13:05:59 +08:00			`torch_path = Path(args.torch_path)`
			`mlx_path = Path(args.mlx_path)`
			`mlx_path.mkdir(parents=True, exist_ok=True)`
lora 2023-11-30 06:14:11 +08:00
generalize lora finetuning for llama and mistral 2023-12-10 06:13:55 +08:00			`# Copy the tokenizer`
fix use for llama 2 from meta (#144) 2023-12-19 11:33:17 +08:00			`tokenizer_path = torch_path / "tokenizer.model"`
			`if not tokenizer_path.exists():`
Fix typo in lora convert.py (#245) 2024-01-07 19:30:30 +08:00			`print(f"Make sure there is a file tokenizer.model in {args.torch_path}")`
fix use for llama 2 from meta (#144) 2023-12-19 11:33:17 +08:00			`exit(0)`
generalize lora finetuning for llama and mistral 2023-12-10 06:13:55 +08:00			`shutil.copyfile(`
fix use for llama 2 from meta (#144) 2023-12-19 11:33:17 +08:00			`str(tokenizer_path),`
generalize lora finetuning for llama and mistral 2023-12-10 06:13:55 +08:00			`str(mlx_path / "tokenizer.model"),`
fix: Unsupported BFloat16 Data Type Issue with MPS Backend 2023-12-08 16:19:35 +08:00			`)`
lora 2023-11-30 06:14:11 +08:00
Qlora (#219) qlora 2024-01-05 13:05:59 +08:00			`# Load the torch model weights to numpy:`
			`weights = torch.load(str(torch_path / "consolidated.00.pth"))`
			`for k, v in weights.items():`
			`weights[k] = v.to(torch.float16).numpy()`
fix use for llama 2 from meta (#144) 2023-12-19 11:33:17 +08:00
Qlora (#219) qlora 2024-01-05 13:05:59 +08:00			`# Standardize the params`
generalize lora finetuning for llama and mistral 2023-12-10 06:13:55 +08:00			`with open(torch_path / "params.json", "r") as f:`
			`config = json.loads(f.read())`
Qlora (#219) qlora 2024-01-05 13:05:59 +08:00			`unused = ["multiple_of", "sliding_window"]`
fix use for llama 2 from meta (#144) 2023-12-19 11:33:17 +08:00			`for k in unused:`
Qlora (#219) qlora 2024-01-05 13:05:59 +08:00			`config.pop(k, None)`
fix conversion 2023-12-11 06:35:39 +08:00			`n_heads = config["n_heads"]`
generalize lora finetuning for llama and mistral 2023-12-10 06:13:55 +08:00			`if "n_kv_heads" not in config:`
			`config["n_kv_heads"] = n_heads`
			`if "head_dim" not in config:`
			`config["head_dim"] = config["dim"] // n_heads`
			`if "hidden_dim" not in config:`
Qlora (#219) qlora 2024-01-05 13:05:59 +08:00			`config["hidden_dim"] = weights["layers.0.feed_forward.w1.weight"].shape[0]`
			`if config.get("vocab_size", -1) < 0:`
			`config["vocab_size"] = weights["output.weight"].shape[0]`

			`if args.quantize:`
			`print("[INFO] Quantizing")`
			`weights, config = quantize(weights, config, args)`

			`np.savez(str(mlx_path / "weights.npz"), **weights)`

			`with open(mlx_path / "config.json", "w") as outfile:`
fix use for llama 2 from meta (#144) 2023-12-19 11:33:17 +08:00			`json.dump(config, outfile, indent=4)`