fine tune deepseek (#932)

This commit is contained in:
Awni Hannun 2024-08-22 10:41:21 -07:00 committed by GitHub
parent 0164d2058b
commit 58591a1b41
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -128,6 +128,16 @@ def linear_to_lora_layers(
keys = set(["norm_attn_norm.attn.Wqkv", "ffn.router.layer"]) keys = set(["norm_attn_norm.attn.Wqkv", "ffn.router.layer"])
elif model.model_type == "internlm2": elif model.model_type == "internlm2":
keys = set(["attention.wqkv", "attention.wo"]) keys = set(["attention.wqkv", "attention.wo"])
elif model.model_type == "deepseek_v2":
keys = set(
[
"self_attn.q_proj",
"self_attn.q_a_proj",
"self_attn.q_b_proj",
"self_attn.kv_a_proj_with_mqa",
"self_attn.kv_b_proj",
]
)
else: else:
raise ValueError(f"Lora does not support {model.model_type}") raise ValueError(f"Lora does not support {model.model_type}")