3 and 6 bit quantization (#1613)

* Support 3 and 6 bit quantization
This commit is contained in:
Alex Barron
2024-11-22 10:22:13 -08:00
committed by GitHub
parent 0c5eea226b
commit c79f6a4a8c
12 changed files with 633 additions and 419 deletions

View File

@@ -549,18 +549,6 @@ class TestFast(mlx_tests.MLXTestCase):
)(x)
self.assertTrue(mx.allclose(vmap_out, vmap_fast_out))
def test_affine_quantize(self):
mx.random.seed(7)
x = mx.random.uniform(shape=(4, 1024))
for bits in (2, 4, 8):
for group_size in (32, 64, 128):
with self.subTest(bits=bits, group_size=group_size):
w, scales, biases = mx.quantize(x, bits=bits, group_size=group_size)
w_p = mx.fast.affine_quantize(
x, scales, biases, bits=bits, group_size=group_size
)
self.assertTrue(mx.allclose(w, w_p))
@unittest.skipIf(not mx.metal.is_available(), "Metal is not available")
def test_custom_kernel_basic(self):
mx.random.seed(7)

View File

@@ -11,7 +11,7 @@ class TestQuantized(mlx_tests.MLXTestCase):
def test_quantize_dequantize(self):
w = mx.random.normal(shape=(128, 512))
for gs in [32, 64, 128]:
for b in [2, 4, 8]:
for b in [2, 3, 6, 4, 8]:
with self.subTest(gs=gs, b=b):
w_q, scales, biases = mx.quantize(w, group_size=gs, bits=b)
w_hat = mx.dequantize(w_q, scales, biases, gs, b)
@@ -22,7 +22,7 @@ class TestQuantized(mlx_tests.MLXTestCase):
# test quantize/dequantize 0s
a = mx.zeros((256, 512))
for gs in [32, 64, 128]:
for b in [2, 4, 8]:
for b in [2, 3, 4, 6, 8]:
w_q, scales, biases = mx.quantize(a, gs, b)
a_hat = mx.dequantize(w_q, scales, biases, gs, b)
self.assertTrue(mx.all(a_hat == 0))
@@ -116,7 +116,7 @@ class TestQuantized(mlx_tests.MLXTestCase):
k1, k2 = mx.random.split(key)
tests = product(
[128, 64, 32], # group_size
[2, 4, 8], # bits
[2, 3, 4, 6, 8], # bits
[512, 1024, 67], # M
[64, 128, 512, 1024], # N
[0, 1, 3, 8], # B
@@ -143,7 +143,7 @@ class TestQuantized(mlx_tests.MLXTestCase):
k1, k2 = mx.random.split(key)
tests = product(
[128, 64, 32], # group_size
[2, 4, 8], # bits
[2, 3, 4, 6, 8], # bits
[512, 1024], # M
[512, 1024, 67], # N
[0, 1, 3, 8], # B