From 6a62a8bca4bbda25347e9f2c605bc4550e435e94 Mon Sep 17 00:00:00 2001
From: Anchen
Date: Fri, 22 Dec 2023 18:03:20 +1100
Subject: [PATCH] chore: remove hardcoded rope_scaling_factor
---
llms/deepseek-coder/deepseek-coder.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llms/deepseek-coder/deepseek-coder.py b/llms/deepseek-coder/deepseek-coder.py
index de52eba3..a331ac57 100644
--- a/llms/deepseek-coder/deepseek-coder.py
+++ b/llms/deepseek-coder/deepseek-coder.py
@@ -105,7 +105,9 @@ class Attention(nn.Module):
args.num_attention_heads * self.head_dim, args.hidden_size, bias=False
)
self.rope = LinearScalingRoPE(
- self.head_dim, rope_scaling_factor=4.0, base=args.rope_theta
+ self.head_dim,
+ rope_scaling_factor=args.rope_scaling_factor,
+ base=args.rope_theta,
)
def __call__(