From 58591a1b4175265e7b316de5b4d0365be658c6b6 Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Thu, 22 Aug 2024 10:41:21 -0700 Subject: [PATCH] fine tune deepseek (#932) --- llms/mlx_lm/tuner/utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/llms/mlx_lm/tuner/utils.py b/llms/mlx_lm/tuner/utils.py index c6af9730..9f18c2c0 100644 --- a/llms/mlx_lm/tuner/utils.py +++ b/llms/mlx_lm/tuner/utils.py @@ -128,6 +128,16 @@ def linear_to_lora_layers( keys = set(["norm_attn_norm.attn.Wqkv", "ffn.router.layer"]) elif model.model_type == "internlm2": keys = set(["attention.wqkv", "attention.wo"]) + elif model.model_type == "deepseek_v2": + keys = set( + [ + "self_attn.q_proj", + "self_attn.q_a_proj", + "self_attn.q_b_proj", + "self_attn.kv_a_proj_with_mqa", + "self_attn.kv_b_proj", + ] + ) else: raise ValueError(f"Lora does not support {model.model_type}")