From bd63a3e5eeec1481af3a5b55b4c7870dadac0b5a Mon Sep 17 00:00:00 2001 From: Anchen Date: Sat, 23 Dec 2023 17:17:14 +1100 Subject: [PATCH] chore: clean up the rope scalling factor param in create cos sin theta --- llms/deepseek-coder/deepseek_coder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llms/deepseek-coder/deepseek_coder.py b/llms/deepseek-coder/deepseek_coder.py index 3600f30e..aa926f6b 100644 --- a/llms/deepseek-coder/deepseek_coder.py +++ b/llms/deepseek-coder/deepseek_coder.py @@ -52,11 +52,11 @@ class LinearScalingRoPE(nn.RoPE): x = mx.reshape(x, (-1, shape[-2], shape[-1])) N = x.shape[1] + offset costheta, sintheta = LinearScalingRoPE.create_cos_sin_theta( - self.rope_scaling_factor, N, self.dims, offset=offset, base=self.base, + rope_scaling_factor=self.rope_scaling_factor, dtype=x.dtype, ) @@ -66,11 +66,11 @@ class LinearScalingRoPE(nn.RoPE): @staticmethod def create_cos_sin_theta( - rope_scaling_factor: float, N: int, D: int, offset: int = 0, base: float = 10000, + rope_scaling_factor: float = 1.0, dtype=mx.float32, ): D = D // 2