From bd63a3e5eeec1481af3a5b55b4c7870dadac0b5a Mon Sep 17 00:00:00 2001
From: Anchen
Date: Sat, 23 Dec 2023 17:17:14 +1100
Subject: [PATCH] chore: clean up the rope scalling factor param in create cos
sin theta
---
llms/deepseek-coder/deepseek_coder.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llms/deepseek-coder/deepseek_coder.py b/llms/deepseek-coder/deepseek_coder.py
index 3600f30e..aa926f6b 100644
--- a/llms/deepseek-coder/deepseek_coder.py
+++ b/llms/deepseek-coder/deepseek_coder.py
@@ -52,11 +52,11 @@ class LinearScalingRoPE(nn.RoPE):
x = mx.reshape(x, (-1, shape[-2], shape[-1]))
N = x.shape[1] + offset
costheta, sintheta = LinearScalingRoPE.create_cos_sin_theta(
- self.rope_scaling_factor,
N,
self.dims,
offset=offset,
base=self.base,
+ rope_scaling_factor=self.rope_scaling_factor,
dtype=x.dtype,
)
@@ -66,11 +66,11 @@ class LinearScalingRoPE(nn.RoPE):
@staticmethod
def create_cos_sin_theta(
- rope_scaling_factor: float,
N: int,
D: int,
offset: int = 0,
base: float = 10000,
+ rope_scaling_factor: float = 1.0,
dtype=mx.float32,
):
D = D // 2