mirror of
https://github.com/ml-explore/mlx.git
synced 2025-06-24 09:21:16 +08:00
Fix quantization of all 0s (#1028)
This commit is contained in:
parent
d0dbfe0b97
commit
ec8578d41a
@ -3274,7 +3274,10 @@ std::tuple<array, array, array> quantize(
|
||||
reshape(w, {w.shape(0), w.shape(1) / group_size, group_size}, s);
|
||||
array w_max = max(packed_w, /* axis= */ -1, /* keepdims= */ true, s);
|
||||
array w_min = min(packed_w, /* axis= */ -1, /* keepdims= */ true, s);
|
||||
array delta = divide(subtract(w_max, w_min, s), array(n_bins, w.dtype()), s);
|
||||
array delta = maximum(
|
||||
divide(subtract(w_max, w_min, s), array(n_bins, w.dtype()), s),
|
||||
array(1e-7, w.dtype()),
|
||||
s);
|
||||
array scales = squeeze(delta, -1, s);
|
||||
array biases = squeeze(w_min, -1, s);
|
||||
|
||||
|
@ -18,6 +18,14 @@ class TestQuantized(mlx_tests.MLXTestCase):
|
||||
eps = 1e-6
|
||||
self.assertTrue((errors <= (scales[..., None] + eps)).all())
|
||||
|
||||
# test quantize/dequantize 0s
|
||||
a = mx.zeros((256, 512))
|
||||
for gs in [32, 64, 128]:
|
||||
for b in [2, 4, 8]:
|
||||
w_q, scales, biases = mx.quantize(a, gs, b)
|
||||
a_hat = mx.dequantize(w_q, scales, biases, gs, b)
|
||||
self.assertTrue(mx.all(a_hat == 0))
|
||||
|
||||
def test_qmm(self):
|
||||
key = mx.random.key(0)
|
||||
k1, k2 = mx.random.split(key)
|
||||
|
Loading…
Reference in New Issue
Block a user