mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-09-17 17:28:15 +08:00
adding OLMoE architecture (#1321)
* initial commit * udpate ACKNOWLEDGMENTS.md * adding olmoe to training * clean up * faster generation * remove sanitize method * more clean ups * adding SwitchGLU * clean up * a little faster and adding norm_topk_prob * formated
This commit is contained in:
@@ -98,6 +98,7 @@ def linear_to_lora_layers(
|
||||
"minicpm",
|
||||
"deepseek",
|
||||
"olmo2",
|
||||
"olmoe",
|
||||
"internlm3",
|
||||
]:
|
||||
keys = set(["self_attn.q_proj", "self_attn.v_proj"])
|
||||
@@ -106,6 +107,8 @@ def linear_to_lora_layers(
|
||||
if model.model_type == "qwen2_moe":
|
||||
keys.add("mlp.gate")
|
||||
keys.add("mlp.shared_expert_gate")
|
||||
if model.model_type == "olmoe":
|
||||
keys.add("mlp.gate")
|
||||
|
||||
elif model.model_type == "gpt_bigcode":
|
||||
keys = set(["attn.c_attn"])
|
||||
|
Reference in New Issue
Block a user