MLX_SWITCH macros to templates (#2320)

2025-12-16 01:49:05 +08:00 · 2025-07-01 01:33:44 -07:00
parent 33bf1a244b
commit 3d5e17e507
27 changed files with 693 additions and 692 deletions
--- a/mlx/backend/cuda/reduce/all_reduce.cu
+++ b/mlx/backend/cuda/reduce/all_reduce.cu
@@ -111,10 +111,11 @@ void all_reduce(
    encoder.add_temporary(intermediate);
    encoder.set_output_array(intermediate);
    encoder.launch_kernel([&](cudaStream_t stream) {
-      MLX_SWITCH_ALL_TYPES(dt, CTYPE, {
-        MLX_SWITCH_REDUCE_OPS(reduce_type, OP, {
-          using T = cuda_type_t<CTYPE>;
-          using U = cu::ReduceResult<OP, T>::type;
+      dispatch_all_types(dt, [&](auto type_tag) {
+        dispatch_reduce_ops(reduce_type, [&](auto reduce_type_tag) {
+          using OP = MLX_GET_TYPE(reduce_type_tag);
+          using T = cuda_type_t<MLX_GET_TYPE(type_tag)>;
+          using U = typename cu::ReduceResult<OP, T>::type;
          auto kernel = cu::all_reduce<T, U, OP, N_READS>;
          kernel<<<blocks, threads, 0, stream>>>(
              static_cast<T*>(indata),
@@ -135,10 +136,11 @@ void all_reduce(

  encoder.set_output_array(out);
  encoder.launch_kernel([&](cudaStream_t stream) {
-    MLX_SWITCH_ALL_TYPES(dt, CTYPE, {
-      MLX_SWITCH_REDUCE_OPS(reduce_type, OP, {
-        using T = cuda_type_t<CTYPE>;
-        using U = cu::ReduceResult<OP, T>::type;
+    dispatch_all_types(dt, [&](auto type_tag) {
+      dispatch_reduce_ops(reduce_type, [&](auto reduce_type_tag) {
+        using OP = MLX_GET_TYPE(reduce_type_tag);
+        using T = cuda_type_t<MLX_GET_TYPE(type_tag)>;
+        using U = typename cu::ReduceResult<OP, T>::type;
        auto kernel = cu::all_reduce<T, U, OP, N_READS>;
        kernel<<<blocks, threads, 0, stream>>>(
            static_cast<T*>(indata), out.data<U>(), block_step, insize);