mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-08-29 03:01:34 +08:00
adding olmoe to training
This commit is contained in:
parent
ef8ec7a27a
commit
bbde6ea4bc
@ -98,6 +98,7 @@ def linear_to_lora_layers(
|
|||||||
"minicpm",
|
"minicpm",
|
||||||
"deepseek",
|
"deepseek",
|
||||||
"olmo2",
|
"olmo2",
|
||||||
|
"olmoe",
|
||||||
"internlm3",
|
"internlm3",
|
||||||
]:
|
]:
|
||||||
keys = set(["self_attn.q_proj", "self_attn.v_proj"])
|
keys = set(["self_attn.q_proj", "self_attn.v_proj"])
|
||||||
@ -106,6 +107,8 @@ def linear_to_lora_layers(
|
|||||||
if model.model_type == "qwen2_moe":
|
if model.model_type == "qwen2_moe":
|
||||||
keys.add("mlp.gate")
|
keys.add("mlp.gate")
|
||||||
keys.add("mlp.shared_expert_gate")
|
keys.add("mlp.shared_expert_gate")
|
||||||
|
if model.model_type == "olmoe":
|
||||||
|
keys.add("mlp.gate")
|
||||||
|
|
||||||
elif model.model_type == "gpt_bigcode":
|
elif model.model_type == "gpt_bigcode":
|
||||||
keys = set(["attn.c_attn"])
|
keys = set(["attn.c_attn"])
|
||||||
|
Loading…
Reference in New Issue
Block a user