mirror of
https://github.com/ml-explore/mlx.git
synced 2025-09-01 12:49:44 +08:00
Fused Affine Quantize/Dequantize ops (#1282)
* Add fast affine dequantize * add full quantize kernel * fused kernel with scale/bias computation * fix docstring * fix no jit error * fix test * test fix * reduce fast api to only affine_quantize
This commit is contained in:
@@ -439,6 +439,18 @@ class TestFast(mlx_tests.MLXTestCase):
|
||||
)(x)
|
||||
self.assertTrue(mx.allclose(vmap_out, vmap_fast_out))
|
||||
|
||||
def test_affine_quantize(self):
|
||||
mx.random.seed(7)
|
||||
x = mx.random.uniform(shape=(4, 1024))
|
||||
for bits in (2, 4, 8):
|
||||
for group_size in (32, 64, 128):
|
||||
with self.subTest(bits=bits, group_size=group_size):
|
||||
w, scales, biases = mx.quantize(x, bits=bits, group_size=group_size)
|
||||
w_p = mx.fast.affine_quantize(
|
||||
x, scales, biases, bits=bits, group_size=group_size
|
||||
)
|
||||
self.assertTrue(mx.allclose(w, w_p))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
Reference in New Issue
Block a user