Add support for qwen2moe (#640)

* add sparsemoe block and update decoder logic * update file name to match HF * update name * Code formatting * update gates calculation * add support for Qwen2MoE. * fix pytest * code formatting and fix missing comma in utils * Remove decoder sparse step. Co-authored-by: bozheng-hit <dsoul0621@gmail.com> * remove gate layer anti-quantisation * remove unused argument --------- Co-authored-by: bozheng-hit <dsoul0621@gmail.com>
2025-12-10 22:46:48 +08:00 · 2024-04-02 20:33:29 +02:00
parent 78c431dc25
commit d661440dbb
3 changed files with 282 additions and 0 deletions
--- a/llms/mlx_lm/tuner/utils.py
+++ b/llms/mlx_lm/tuner/utils.py
@@ -70,6 +70,7 @@ def linear_to_lora_layers(
        "mixtral",
        "stablelm",
        "qwen2",
+        "qwen2_moe",
        "gemma",
        "starcoder2",
        "cohere",
@@ -77,6 +78,9 @@ def linear_to_lora_layers(
        keys = set(["self_attn.q_proj", "self_attn.v_proj"])
        if model.model_type == "mixtral":
            keys.add("block_sparse_moe.gate")
+        if model.model_type == "qwen2_moe":
+            keys.add("mlp.gate")
+            keys.add("mlp.shared_expert_gate")
    elif model.model_type == "olmo":
        keys = set(["att_proj"])
    elif model.model_type == "phi-msft":