chore: remove hardcoded rope_scaling_factor

2025-08-31 11:54:37 +08:00 · 2023-12-22 18:03:20 +11:00 · 2023-12-22 18:03:20 +11:00 · 6a62a8bca4
commit 6a62a8bca4
parent e17e07002a
1 changed files with 3 additions and 1 deletions
--- a/llms/deepseek-coder/deepseek-coder.py
+++ b/llms/deepseek-coder/deepseek-coder.py
@ -105,7 +105,9 @@ class Attention(nn.Module):
            args.num_attention_heads * self.head_dim, args.hidden_size, bias=False
        )
        self.rope = LinearScalingRoPE(
-            self.head_dim, rope_scaling_factor=4.0, base=args.rope_theta
+            self.head_dim,
+            rope_scaling_factor=args.rope_scaling_factor,
+            base=args.rope_theta,
        )

    def __call__(