Lazy loading models for faster convert and merge (#462)

2025-12-16 02:08:55 +08:00 · 2024-02-20 13:36:55 -08:00
parent 8eee4399f4
commit dc4f2e0a6b
3 changed files with 41 additions and 13 deletions
--- a/llms/mlx_lm/convert.py
+++ b/llms/mlx_lm/convert.py
@@ -96,7 +96,7 @@ def convert(
 ):
    print("[INFO] Loading")
    model_path = get_model_path(hf_path)
-    model, config, tokenizer = fetch_from_hub(model_path)
+    model, config, tokenizer = fetch_from_hub(model_path, lazy=True)

    weights = dict(tree_flatten(model.parameters()))
    dtype = mx.float16 if quantize else getattr(mx, dtype)
@@ -110,7 +110,8 @@ def convert(
    if isinstance(mlx_path, str):
        mlx_path = Path(mlx_path)

-    save_weights(mlx_path, weights)
+    del model
+    save_weights(mlx_path, weights, donate_weights=True)

    py_files = glob.glob(str(model_path / "*.py"))
    for file in py_files: