Add Phi-3.5-MoE (#946)

* add phimoe * add phimoe to tunner * add switch_mlp * fix SuScaled args * nits --------- Co-authored-by: Awni Hannun <awni@apple.com>
2025-12-16 02:08:55 +08:00 · 2024-08-24 15:52:33 +02:00
parent 6731254e76
commit b5e18ef1e3
3 changed files with 225 additions and 2 deletions
--- a/llms/mlx_lm/tuner/utils.py
+++ b/llms/mlx_lm/tuner/utils.py
@@ -96,6 +96,7 @@ def linear_to_lora_layers(
        "stablelm",
        "qwen2",
        "qwen2_moe",
+        "phimoe",
        "gemma",
        "gemma2",
        "starcoder2",
@@ -104,7 +105,7 @@ def linear_to_lora_layers(
        "deepseek",
    ]:
        keys = set(["self_attn.q_proj", "self_attn.v_proj"])
-        if model.model_type == "mixtral":
+        if model.model_type in ["mixtral", "phimoe"]:
            keys.add("block_sparse_moe.gate")
        if model.model_type == "qwen2_moe":
            keys.add("mlp.gate")