Add Phi-3.5-MoE (#946)

* add phimoe

* add phimoe to tunner

* add switch_mlp

* fix SuScaled args

* nits

---------

Co-authored-by: Awni Hannun <awni@apple.com>
This commit is contained in:
Prince Canuma
2024-08-24 15:52:33 +02:00
committed by GitHub
parent 6731254e76
commit b5e18ef1e3
3 changed files with 225 additions and 2 deletions

View File

@@ -96,6 +96,7 @@ def linear_to_lora_layers(
"stablelm",
"qwen2",
"qwen2_moe",
"phimoe",
"gemma",
"gemma2",
"starcoder2",
@@ -104,7 +105,7 @@ def linear_to_lora_layers(
"deepseek",
]:
keys = set(["self_attn.q_proj", "self_attn.v_proj"])
if model.model_type == "mixtral":
if model.model_type in ["mixtral", "phimoe"]:
keys.add("block_sparse_moe.gate")
if model.model_type == "qwen2_moe":
keys.add("mlp.gate")