From 6a62a8bca4bbda25347e9f2c605bc4550e435e94 Mon Sep 17 00:00:00 2001 From: Anchen Date: Fri, 22 Dec 2023 18:03:20 +1100 Subject: [PATCH] chore: remove hardcoded rope_scaling_factor --- llms/deepseek-coder/deepseek-coder.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llms/deepseek-coder/deepseek-coder.py b/llms/deepseek-coder/deepseek-coder.py index de52eba3..a331ac57 100644 --- a/llms/deepseek-coder/deepseek-coder.py +++ b/llms/deepseek-coder/deepseek-coder.py @@ -105,7 +105,9 @@ class Attention(nn.Module): args.num_attention_heads * self.head_dim, args.hidden_size, bias=False ) self.rope = LinearScalingRoPE( - self.head_dim, rope_scaling_factor=4.0, base=args.rope_theta + self.head_dim, + rope_scaling_factor=args.rope_scaling_factor, + base=args.rope_theta, ) def __call__(