Merge branch 'main' into nccl_backend

2025-12-16 01:49:05 +08:00 · 2025-08-20 13:37:18 +02:00
parent 984cefb14d 512281781c
commit f7c11b965e
109 changed files with 3214 additions and 1178 deletions
--- a/python/src/array.cpp
+++ b/python/src/array.cpp
@@ -28,30 +28,45 @@ class ArrayAt {
 public:
  ArrayAt(mx::array x) : x_(std::move(x)) {}
  ArrayAt& set_indices(nb::object indices) {
+    initialized_ = true;
    indices_ = indices;
    return *this;
  }
+  void check_initialized() {
+    if (!initialized_) {
+      throw std::invalid_argument(
+          "Must give indices to array.at (e.g. `x.at[0].add(4)`).");
+    }
+  }
+
  mx::array add(const ScalarOrArray& v) {
+    check_initialized();
    return mlx_add_item(x_, indices_, v);
  }
  mx::array subtract(const ScalarOrArray& v) {
+    check_initialized();
    return mlx_subtract_item(x_, indices_, v);
  }
  mx::array multiply(const ScalarOrArray& v) {
+    check_initialized();
    return mlx_multiply_item(x_, indices_, v);
  }
  mx::array divide(const ScalarOrArray& v) {
+    check_initialized();
    return mlx_divide_item(x_, indices_, v);
  }
  mx::array maximum(const ScalarOrArray& v) {
+    check_initialized();
    return mlx_maximum_item(x_, indices_, v);
  }
  mx::array minimum(const ScalarOrArray& v) {
+    check_initialized();
    return mlx_minimum_item(x_, indices_, v);
  }

 private:
  mx::array x_;
+  bool initialized_{false};
  nb::object indices_;
 };

--- a/python/tests/cuda_skip.py
+++ b/python/tests/cuda_skip.py
@@ -15,15 +15,6 @@ cuda_skip = {
    "TestOps.test_hadamard_grad_vmap",
    # Convolutions NYI
    "TestConv.test_1d_conv_with_2d",
-    "TestConv.test_conv_1d_groups_flipped",
-    "TestConv.test_conv_general_flip_grad",
-    "TestConv.test_conv_groups_grad",
-    "TestConv.test_torch_conv_2D",
-    "TestConv.test_torch_conv_depthwise",
-    "TestConv.test_torch_conv_general",
-    "TestConvTranspose.test_torch_conv_transpose_1D_grad",
-    "TestConvTranspose.test_torch_conv_transpose_2D_grad",
-    "TestConvTranspose.test_torch_conv_transpose_3D_grad",
    # FFTs NYI
    "TestFFT.test_fft",
    "TestFFT.test_fft_big_powers_of_two",
--- a/python/tests/test_array.py
+++ b/python/tests/test_array.py
@@ -1365,6 +1365,9 @@ class TestArray(mlx_tests.MLXTestCase):

    def test_array_at(self):
        a = mx.array(1)
+        with self.assertRaises(ValueError):
+            a.at.add(1)
+
        a = a.at[None].add(1)
        self.assertEqual(a.item(), 2)

--- a/python/tests/test_fast.py
+++ b/python/tests/test_fast.py
@@ -639,12 +639,12 @@ class TestFast(mlx_tests.MLXTestCase):
            ],
            grid=(6, 1, 1),
            threadgroup=(2, 1, 1),
-            output_shapes=[(2, 2), (3, 2)],
+            output_shapes=[(3, 2), (3, 2)],
            output_dtypes=[mx.float32, mx.int32],
            stream=mx.gpu,
        )

-        self.assertTrue(mx.allclose(out[0], mx.full((2, 2), 14.0484)))
+        self.assertTrue(mx.allclose(out[0], mx.full((3, 2), 14.0484)))
        self.assertTrue(mx.allclose(out[1], mx.full((3, 2), -2, dtype=mx.int32)))

    @unittest.skipIf(not mx.metal.is_available(), "Metal is not available")