From 58591a1b4175265e7b316de5b4d0365be658c6b6 Mon Sep 17 00:00:00 2001
From: Awni Hannun <awni@apple.com>
Date: Thu, 22 Aug 2024 10:41:21 -0700
Subject: [PATCH] fine tune deepseek (#932)

---
 llms/mlx_lm/tuner/utils.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/llms/mlx_lm/tuner/utils.py b/llms/mlx_lm/tuner/utils.py
index c6af9730..9f18c2c0 100644
--- a/llms/mlx_lm/tuner/utils.py
+++ b/llms/mlx_lm/tuner/utils.py
@@ -128,6 +128,16 @@ def linear_to_lora_layers(
         keys = set(["norm_attn_norm.attn.Wqkv", "ffn.router.layer"])
     elif model.model_type == "internlm2":
         keys = set(["attention.wqkv", "attention.wo"])
+    elif model.model_type == "deepseek_v2":
+        keys = set(
+            [
+                "self_attn.q_proj",
+                "self_attn.q_a_proj",
+                "self_attn.q_b_proj",
+                "self_attn.kv_a_proj_with_mqa",
+                "self_attn.kv_b_proj",
+            ]
+        )
     else:
         raise ValueError(f"Lora does not support {model.model_type}")