2024-01-24 00:44:37 +08:00
|
|
|
import argparse
|
|
|
|
import glob
|
|
|
|
import shutil
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
from mlx.utils import tree_flatten, tree_unflatten
|
|
|
|
|
2024-03-22 01:34:11 +08:00
|
|
|
from .gguf import convert_to_gguf
|
2024-01-24 00:44:37 +08:00
|
|
|
from .tuner.lora import LoRALinear
|
2024-01-26 10:59:32 +08:00
|
|
|
from .tuner.utils import apply_lora_layers, dequantize
|
2024-03-14 21:36:05 +08:00
|
|
|
from .utils import (
|
|
|
|
fetch_from_hub,
|
|
|
|
get_model_path,
|
|
|
|
save_config,
|
|
|
|
save_weights,
|
|
|
|
upload_to_hub,
|
|
|
|
)
|
2024-01-24 00:44:37 +08:00
|
|
|
|
|
|
|
|
|
|
|
def parse_arguments() -> argparse.Namespace:
|
|
|
|
parser = argparse.ArgumentParser(description="LoRA or QLoRA finetuning.")
|
|
|
|
parser.add_argument(
|
|
|
|
"--model",
|
|
|
|
default="mlx_model",
|
|
|
|
help="The path to the local model directory or Hugging Face repo.",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--save-path",
|
|
|
|
default="lora_fused_model",
|
|
|
|
help="The path to save the fused model.",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
2024-04-03 04:52:53 +08:00
|
|
|
"--adapter-path",
|
2024-01-24 00:44:37 +08:00
|
|
|
type=str,
|
2024-04-03 04:52:53 +08:00
|
|
|
default="adapters",
|
|
|
|
help="Path to the trained adapter weights and config.",
|
2024-01-24 00:44:37 +08:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--hf-path",
|
|
|
|
type=str,
|
|
|
|
default=None,
|
|
|
|
help="Path to the original Hugging Face model. Required for upload if --model is a local directory.",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--upload-repo",
|
|
|
|
help="The Hugging Face repo to upload the model to.",
|
|
|
|
type=str,
|
|
|
|
default=None,
|
|
|
|
)
|
2024-01-26 10:59:32 +08:00
|
|
|
parser.add_argument(
|
|
|
|
"--de-quantize",
|
|
|
|
help="Generate a de-quantized model.",
|
|
|
|
action="store_true",
|
|
|
|
)
|
2024-03-22 01:34:11 +08:00
|
|
|
parser.add_argument(
|
|
|
|
"--export-gguf",
|
|
|
|
help="Export model weights in GGUF format.",
|
|
|
|
action="store_true",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--gguf-path",
|
|
|
|
help="Path to save the exported GGUF format model weights. Default is ggml-model-f16.gguf.",
|
|
|
|
default="ggml-model-f16.gguf",
|
|
|
|
type=str,
|
|
|
|
)
|
2024-01-24 00:44:37 +08:00
|
|
|
return parser.parse_args()
|
|
|
|
|
|
|
|
|
|
|
|
def main() -> None:
|
|
|
|
print("Loading pretrained model")
|
|
|
|
args = parse_arguments()
|
|
|
|
|
|
|
|
model_path = get_model_path(args.model)
|
|
|
|
model, config, tokenizer = fetch_from_hub(model_path)
|
|
|
|
|
|
|
|
model.freeze()
|
2024-04-03 04:52:53 +08:00
|
|
|
model = apply_lora_layers(model, args.adapter_path)
|
2024-01-26 10:59:32 +08:00
|
|
|
|
2024-01-24 00:44:37 +08:00
|
|
|
fused_linears = [
|
|
|
|
(n, m.to_linear())
|
|
|
|
for n, m in model.named_modules()
|
|
|
|
if isinstance(m, LoRALinear)
|
|
|
|
]
|
|
|
|
|
|
|
|
model.update_modules(tree_unflatten(fused_linears))
|
2024-01-26 10:59:32 +08:00
|
|
|
|
|
|
|
if args.de_quantize:
|
|
|
|
print("De-quantizing model")
|
|
|
|
model = dequantize(model)
|
|
|
|
|
2024-01-24 00:44:37 +08:00
|
|
|
weights = dict(tree_flatten(model.parameters()))
|
|
|
|
|
|
|
|
save_path = Path(args.save_path)
|
|
|
|
|
|
|
|
save_weights(save_path, weights)
|
|
|
|
|
|
|
|
py_files = glob.glob(str(model_path / "*.py"))
|
|
|
|
for file in py_files:
|
|
|
|
shutil.copy(file, save_path)
|
|
|
|
|
|
|
|
tokenizer.save_pretrained(save_path)
|
|
|
|
|
2024-01-26 10:59:32 +08:00
|
|
|
if args.de_quantize:
|
|
|
|
config.pop("quantization", None)
|
|
|
|
|
2024-03-14 21:36:05 +08:00
|
|
|
save_config(config, config_path=save_path / "config.json")
|
2024-01-24 00:44:37 +08:00
|
|
|
|
2024-03-22 01:34:11 +08:00
|
|
|
if args.export_gguf:
|
|
|
|
model_type = config["model_type"]
|
|
|
|
if model_type not in ["llama", "mixtral", "mistral"]:
|
|
|
|
raise ValueError(
|
|
|
|
f"Model type {model_type} not supported for GGUF conversion."
|
|
|
|
)
|
|
|
|
convert_to_gguf(model_path, weights, config, str(save_path / args.gguf_path))
|
|
|
|
|
2024-01-24 00:44:37 +08:00
|
|
|
if args.upload_repo is not None:
|
|
|
|
hf_path = args.hf_path or (
|
|
|
|
args.model if not Path(args.model).exists() else None
|
|
|
|
)
|
|
|
|
if hf_path is None:
|
|
|
|
raise ValueError(
|
|
|
|
"Must provide original Hugging Face repo to upload local model."
|
|
|
|
)
|
|
|
|
upload_to_hub(args.save_path, args.upload_repo, hf_path)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|