Fix quantization of all 0s (#1028)

2025-10-05 14:28:09 +08:00 · 2024-04-24 00:40:42 -07:00
parent d0dbfe0b97
commit ec8578d41a
2 changed files with 12 additions and 1 deletions
--- a/mlx/ops.cpp
+++ b/mlx/ops.cpp
@@ -3274,7 +3274,10 @@ std::tuple<array, array, array> quantize(
      reshape(w, {w.shape(0), w.shape(1) / group_size, group_size}, s);
  array w_max = max(packed_w, /* axis= */ -1, /* keepdims= */ true, s);
  array w_min = min(packed_w, /* axis= */ -1, /* keepdims= */ true, s);
-  array delta = divide(subtract(w_max, w_min, s), array(n_bins, w.dtype()), s);
+  array delta = maximum(
+      divide(subtract(w_max, w_min, s), array(n_bins, w.dtype()), s),
+      array(1e-7, w.dtype()),
+      s);
  array scales = squeeze(delta, -1, s);
  array biases = squeeze(w_min, -1, s);

--- a/python/tests/test_quantized.py
+++ b/python/tests/test_quantized.py
@@ -18,6 +18,14 @@ class TestQuantized(mlx_tests.MLXTestCase):
                eps = 1e-6
                self.assertTrue((errors <= (scales[..., None] + eps)).all())

+        # test quantize/dequantize 0s
+        a = mx.zeros((256, 512))
+        for gs in [32, 64, 128]:
+            for b in [2, 4, 8]:
+                w_q, scales, biases = mx.quantize(a, gs, b)
+                a_hat = mx.dequantize(w_q, scales, biases, gs, b)
+                self.assertTrue(mx.all(a_hat == 0))
+
    def test_qmm(self):
        key = mx.random.key(0)
        k1, k2 = mx.random.split(key)