Switch to fast RMS/LN Norm (#603)

* use nn.RMSNorm, use sdpa, cleanup * bump mlx versions * minor update * use fast layer norm * version bump * update requirement for whisper * update requirement for gguf
2025-12-16 02:08:55 +08:00 · 2024-03-23 07:13:51 -07:00
parent fbed720d6f
commit b8a348c1b8
44 changed files with 144 additions and 1155 deletions
--- a/llms/tests/test_models.py
+++ b/llms/tests/test_models.py
@@ -21,7 +21,9 @@ class TestModels(unittest.TestCase):
            self.assertEqual(outputs.shape, (1, 2, vocab_size))
            self.assertEqual(outputs.dtype, t)

-            outputs, cache = model(mx.argmax(outputs[1, :], keepdims=True), cache=cache)
+            outputs, cache = model(
+                mx.argmax(outputs[0, -1:, :], keepdims=True), cache=cache
+            )
            self.assertEqual(outputs.shape, (1, 1, vocab_size))
            self.assertEqual(outputs.dtype, t)