Switch to fast RMS/LN Norm (#603)

* use nn.RMSNorm, use sdpa, cleanup * bump mlx versions * minor update * use fast layer norm * version bump * update requirement for whisper * update requirement for gguf
2025-12-15 09:48:54 +08:00 · 2024-03-23 07:13:51 -07:00
parent fbed720d6f
commit b8a348c1b8
44 changed files with 144 additions and 1155 deletions
--- a/llms/mlx_lm/models/gemma.py
+++ b/llms/mlx_lm/models/gemma.py
@@ -23,13 +23,6 @@ class ModelArgs(BaseModelArgs):
    rope_traditional: bool = False


-@partial(mx.compile, shapeless=True)
-def rms_norm(x, weight, eps):
-    x = x.astype(mx.float32)
-    x = x * mx.rsqrt(x.square().mean(-1, keepdims=True) + eps)
-    return (1.0 + weight) * x.astype(weight.dtype)
-
-
 class RMSNorm(nn.Module):
    def __init__(self, dims: int, eps: float = 1e-5):
        super().__init__()
@@ -37,7 +30,7 @@ class RMSNorm(nn.Module):
        self.eps = eps

    def __call__(self, x):
-        return rms_norm(x, self.weight, self.eps)
+        return mx.fast.rms_norm(x, 1.0 + self.weight, self.eps)


 class Attention(nn.Module):