adding olmoe to training

This commit is contained in:
Goekdeniz-Guelmez 2025-03-04 21:08:55 +01:00
parent ef8ec7a27a
commit bbde6ea4bc

View File

@ -98,6 +98,7 @@ def linear_to_lora_layers(
"minicpm", "minicpm",
"deepseek", "deepseek",
"olmo2", "olmo2",
"olmoe",
"internlm3", "internlm3",
]: ]:
keys = set(["self_attn.q_proj", "self_attn.v_proj"]) keys = set(["self_attn.q_proj", "self_attn.v_proj"])
@ -106,6 +107,8 @@ def linear_to_lora_layers(
if model.model_type == "qwen2_moe": if model.model_type == "qwen2_moe":
keys.add("mlp.gate") keys.add("mlp.gate")
keys.add("mlp.shared_expert_gate") keys.add("mlp.shared_expert_gate")
if model.model_type == "olmoe":
keys.add("mlp.gate")
elif model.model_type == "gpt_bigcode": elif model.model_type == "gpt_bigcode":
keys = set(["attn.c_attn"]) keys = set(["attn.c_attn"])