MLX_SWITCH macros to templates (#2320)

2025-12-16 01:49:05 +08:00 · 2025-07-01 01:33:44 -07:00
parent 33bf1a244b
commit 3d5e17e507
27 changed files with 693 additions and 692 deletions
--- a/mlx/backend/cuda/copy/copy_contiguous.cu
+++ b/mlx/backend/cuda/copy/copy_contiguous.cu
@@ -36,19 +36,23 @@ void copy_contiguous(
    int64_t in_offset,
    int64_t out_offset) {
  encoder.launch_kernel([&](cudaStream_t stream) {
-    MLX_SWITCH_COPY_TYPES(in, out, InType, OutType, {
-      MLX_SWITCH_BOOL(out.data_size() > UINT32_MAX, LARGE, {
-        using IdxT = std::conditional_t<LARGE, int64_t, uint32_t>;
-        auto kernel = cu::copy_s<InType, OutType, IdxT>;
-        if (ctype == CopyType::Vector) {
-          kernel = cu::copy_v<InType, OutType, IdxT>;
-        }
-        auto [num_blocks, block_dims] = get_launch_args(
-            kernel, out.data_size(), out.shape(), out.strides(), LARGE);
-        kernel<<<num_blocks, block_dims, 0, stream>>>(
-            in.data<InType>() + in_offset,
-            out.data<OutType>() + out_offset,
-            out.data_size());
+    dispatch_all_types(in.dtype(), [&](auto in_type_tag) {
+      dispatch_all_types(out.dtype(), [&](auto out_type_tag) {
+        dispatch_bool(out.data_size() > INT32_MAX, [&](auto large) {
+          using InType = cuda_type_t<MLX_GET_TYPE(in_type_tag)>;
+          using OutType = cuda_type_t<MLX_GET_TYPE(out_type_tag)>;
+          using IdxT = std::conditional_t<large(), int64_t, uint32_t>;
+          auto kernel = cu::copy_s<InType, OutType, IdxT>;
+          if (ctype == CopyType::Vector) {
+            kernel = cu::copy_v<InType, OutType, IdxT>;
+          }
+          auto [num_blocks, block_dims] = get_launch_args(
+              kernel, out.data_size(), out.shape(), out.strides(), large());
+          kernel<<<num_blocks, block_dims, 0, stream>>>(
+              in.data<InType>() + in_offset,
+              out.data<OutType>() + out_offset,
+              out.data_size());
+        });
      });
    });
  });