fix quantized vjp for mxfp4 (#2555)

2025-11-06 20:20:11 +08:00 · 2025-08-29 10:06:15 -07:00
parent 9c68b50853
commit 8ce49cd39e
2 changed files with 43 additions and 8 deletions
--- a/mlx/primitives.cpp
+++ b/mlx/primitives.cpp
@@ -3246,7 +3246,8 @@ std::vector<array> QuantizedMatmul::vjp(
          cotangents[0],
          primals[1],
          primals[2],
-          primals[3],
+          mode_ == QuantizationMode::Affine ? std::optional<array>(primals[3])
+                                            : std::nullopt,
          !transpose_,
          group_size_,
          bits_,
@@ -3260,7 +3261,7 @@ std::vector<array> QuantizedMatmul::vjp(
          "[QuantizedMatmul::vjp] no gradient wrt the quantized weights.");
    } else {
      if (mode_ == QuantizationMode::Mxfp4) {
-        throw std::runtime_error(
+        throw std::invalid_argument(
            "[QuantizedMatmul::vjp] no gradient wrt scales with mxfp4 quantization.");
      }
      if (!dsb) {
@@ -3305,7 +3306,8 @@ std::vector<array> QuantizedMatmul::jvp(
      tangents[0],
      primals[1],
      primals[2],
-      primals[3],
+      mode_ == QuantizationMode::Affine ? std::optional<array>(primals[3])
+                                        : std::nullopt,
      transpose_,
      group_size_,
      bits_,
@@ -3346,9 +3348,11 @@ std::vector<array> GatherQMM::vjp(
  auto& x = primals[0];
  auto& w = primals[1];
  auto& scales = primals[2];
-  auto& biases = primals[3];
-  auto& lhs_indices = primals[4];
-  auto& rhs_indices = primals[5];
+  auto& lhs_indices = primals[primals.size() - 2];
+  auto& rhs_indices = primals[primals.size() - 1];
+  auto biases = (mode_ == QuantizationMode::Affine)
+      ? std::optional<array>(primals[3])
+      : std::nullopt;

  int M = cotan.shape(-2);
  int N = cotan.shape(-1);
@@ -3401,7 +3405,7 @@ std::vector<array> GatherQMM::vjp(
          "[GatherQMM::vjp] no gradient wrt the quantized weights.");
    } else {
      if (mode_ == QuantizationMode::Mxfp4) {
-        throw std::runtime_error(
+        throw std::invalid_argument(
            "[GatherQMM::vjp] no gradient wrt scales with mxfp4 quantization.");
      }

@@ -3432,7 +3436,7 @@ std::vector<array> GatherQMM::vjp(
                        dequantize(
                            w,
                            ones_like(scales, stream()),
-                            zeros_like(biases, stream()),
+                            zeros_like(*biases, stream()),
                            group_size_,
                            bits_,
                            quantization_mode_to_string(mode_),
--- a/python/tests/test_quantized.py
+++ b/python/tests/test_quantized.py
@@ -842,6 +842,37 @@ class TestQuantized(mlx_tests.MLXTestCase):
                num_ds = (out_up - out_down) / (2 * eps)
                self.assertAlmostEqual(dparams[p][idx], num_ds, delta=2e-2)

+    def test_mxfp4_vjp_scales_throws(self):
+        mx.random.seed(0)
+        x = mx.random.normal(shape=(2, 512))
+        w = mx.random.normal(shape=(512, 512))
+        wq, s = mx.quantize(w, bits=4, group_size=32, mode="mxfp4")
+
+        def mm(s, x, wq):
+            return mx.quantized_matmul(
+                x, wq, s, bits=4, group_size=32, mode="mxfp4"
+            ).sum()
+
+        # Should raise
+        with self.assertRaises(ValueError):
+            ds = mx.grad(mm)(s, x, wq)
+
+        rhs_indices = mx.array(0)
+        with self.assertRaises(ValueError):
+
+            def gmm(s, x, wq):
+                return mx.gather_qmm(
+                    x,
+                    wq,
+                    s,
+                    rhs_indices=rhs_indices,
+                    bits=4,
+                    group_size=32,
+                    mode="mxfp4",
+                ).sum()
+
+            ds = mx.grad(gmm)(s, x, wq)
+

 if __name__ == "__main__":
    mlx_tests.MLXTestRunner()