From bbde6ea4bc8229048de7de6b82f89fe486953f34 Mon Sep 17 00:00:00 2001 From: Goekdeniz-Guelmez Date: Tue, 4 Mar 2025 21:08:55 +0100 Subject: [PATCH] adding olmoe to training --- llms/mlx_lm/tuner/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llms/mlx_lm/tuner/utils.py b/llms/mlx_lm/tuner/utils.py index f5df11e3..cc7c6c20 100644 --- a/llms/mlx_lm/tuner/utils.py +++ b/llms/mlx_lm/tuner/utils.py @@ -98,6 +98,7 @@ def linear_to_lora_layers( "minicpm", "deepseek", "olmo2", + "olmoe", "internlm3", ]: keys = set(["self_attn.q_proj", "self_attn.v_proj"]) @@ -106,6 +107,8 @@ def linear_to_lora_layers( if model.model_type == "qwen2_moe": keys.add("mlp.gate") keys.add("mlp.shared_expert_gate") + if model.model_type == "olmoe": + keys.add("mlp.gate") elif model.model_type == "gpt_bigcode": keys = set(["attn.c_attn"])