Fused Affine Quantize/Dequantize ops (#1282)

* Add fast affine dequantize * add full quantize kernel * fused kernel with scale/bias computation * fix docstring * fix no jit error * fix test * test fix * reduce fast api to only affine_quantize
2025-12-14 17:12:49 +08:00 · 2024-07-29 15:11:38 -07:00
parent aa1d6cadad
commit c52d1600f0
11 changed files with 655 additions and 400 deletions
--- a/python/tests/test_fast.py
+++ b/python/tests/test_fast.py
@@ -439,6 +439,18 @@ class TestFast(mlx_tests.MLXTestCase):
        )(x)
        self.assertTrue(mx.allclose(vmap_out, vmap_fast_out))

+    def test_affine_quantize(self):
+        mx.random.seed(7)
+        x = mx.random.uniform(shape=(4, 1024))
+        for bits in (2, 4, 8):
+            for group_size in (32, 64, 128):
+                with self.subTest(bits=bits, group_size=group_size):
+                    w, scales, biases = mx.quantize(x, bits=bits, group_size=group_size)
+                    w_p = mx.fast.affine_quantize(
+                        x, scales, biases, bits=bits, group_size=group_size
+                    )
+                    self.assertTrue(mx.allclose(w, w_p))
+

 if __name__ == "__main__":
    unittest.main()