From ef9495fb8f457803b1c2ea6714c1c52ccabe2b1b Mon Sep 17 00:00:00 2001 From: Cheng Date: Thu, 12 Jun 2025 23:39:06 +0000 Subject: [PATCH] Rename kernels/ to device/ --- mlx/backend/cuda/binary.cu | 4 ++-- mlx/backend/cuda/compiled.cpp | 6 +++--- mlx/backend/cuda/copy/copy.cuh | 2 +- mlx/backend/cuda/{kernels => device}/arange.cuh | 0 mlx/backend/cuda/{kernels => device}/binary_ops.cuh | 2 +- mlx/backend/cuda/{kernels => device}/cast_op.cuh | 0 mlx/backend/cuda/{kernels => device}/config.h | 0 mlx/backend/cuda/{kernels => device}/cucomplex_math.cuh | 0 mlx/backend/cuda/{kernels => device}/fp16_math.cuh | 0 mlx/backend/cuda/{kernels => device}/unary_ops.cuh | 4 ++-- mlx/backend/cuda/{kernels => device}/utils.cuh | 2 +- mlx/backend/cuda/jit_module.h | 2 +- mlx/backend/cuda/kernel_utils.cuh | 4 ++-- mlx/backend/cuda/logsumexp.cu | 2 +- mlx/backend/cuda/primitives.cu | 4 ++-- mlx/backend/cuda/reduce/col_reduce.cu | 2 +- mlx/backend/cuda/reduce/reduce.cuh | 2 +- mlx/backend/cuda/reduce/reduce_ops.cuh | 2 +- mlx/backend/cuda/reduce/row_reduce.cu | 2 +- mlx/backend/cuda/reduce/segmented_reduce.cu | 2 +- mlx/backend/cuda/softmax.cu | 4 ++-- mlx/backend/cuda/unary.cu | 4 ++-- 22 files changed, 25 insertions(+), 25 deletions(-) rename mlx/backend/cuda/{kernels => device}/arange.cuh (100%) rename mlx/backend/cuda/{kernels => device}/binary_ops.cuh (99%) rename mlx/backend/cuda/{kernels => device}/cast_op.cuh (100%) rename mlx/backend/cuda/{kernels => device}/config.h (100%) rename mlx/backend/cuda/{kernels => device}/cucomplex_math.cuh (100%) rename mlx/backend/cuda/{kernels => device}/fp16_math.cuh (100%) rename mlx/backend/cuda/{kernels => device}/unary_ops.cuh (98%) rename mlx/backend/cuda/{kernels => device}/utils.cuh (99%) diff --git a/mlx/backend/cuda/binary.cu b/mlx/backend/cuda/binary.cu index 360772998..47efc44d2 100644 --- a/mlx/backend/cuda/binary.cu +++ b/mlx/backend/cuda/binary.cu @@ -2,9 +2,9 @@ #include "mlx/backend/common/binary.h" #include "mlx/backend/cuda/device.h" +#include "mlx/backend/cuda/device/binary_ops.cuh" +#include "mlx/backend/cuda/device/cucomplex_math.cuh" #include "mlx/backend/cuda/kernel_utils.cuh" -#include "mlx/backend/cuda/kernels/binary_ops.cuh" -#include "mlx/backend/cuda/kernels/cucomplex_math.cuh" #include "mlx/dtype_utils.h" #include "mlx/primitives.h" diff --git a/mlx/backend/cuda/compiled.cpp b/mlx/backend/cuda/compiled.cpp index de004b482..a6b8223e0 100644 --- a/mlx/backend/cuda/compiled.cpp +++ b/mlx/backend/cuda/compiled.cpp @@ -129,9 +129,9 @@ struct FusedKernelBuilder { } // namespace cu constexpr const char* g_jit_includes = R"( -#include "mlx/backend/cuda/kernels/binary_ops.cuh" -#include "mlx/backend/cuda/kernels/unary_ops.cuh" -#include "mlx/backend/cuda/kernels/utils.cuh" +#include "mlx/backend/cuda/device/binary_ops.cuh" +#include "mlx/backend/cuda/device/unary_ops.cuh" +#include "mlx/backend/cuda/device/utils.cuh" #include diff --git a/mlx/backend/cuda/copy/copy.cuh b/mlx/backend/cuda/copy/copy.cuh index dd1d09d30..0c1eff774 100644 --- a/mlx/backend/cuda/copy/copy.cuh +++ b/mlx/backend/cuda/copy/copy.cuh @@ -3,8 +3,8 @@ #pragma once #include "mlx/backend/cuda/device.h" +#include "mlx/backend/cuda/device/cast_op.cuh" #include "mlx/backend/cuda/kernel_utils.cuh" -#include "mlx/backend/cuda/kernels/cast_op.cuh" #include "mlx/backend/gpu/copy.h" #include "mlx/dtype_utils.h" diff --git a/mlx/backend/cuda/kernels/arange.cuh b/mlx/backend/cuda/device/arange.cuh similarity index 100% rename from mlx/backend/cuda/kernels/arange.cuh rename to mlx/backend/cuda/device/arange.cuh diff --git a/mlx/backend/cuda/kernels/binary_ops.cuh b/mlx/backend/cuda/device/binary_ops.cuh similarity index 99% rename from mlx/backend/cuda/kernels/binary_ops.cuh rename to mlx/backend/cuda/device/binary_ops.cuh index 3bc30eb02..4779a6f33 100644 --- a/mlx/backend/cuda/kernels/binary_ops.cuh +++ b/mlx/backend/cuda/device/binary_ops.cuh @@ -1,6 +1,6 @@ // Copyright © 2025 Apple Inc. -#include "mlx/backend/cuda/kernels/fp16_math.cuh" +#include "mlx/backend/cuda/device/fp16_math.cuh" #include #include diff --git a/mlx/backend/cuda/kernels/cast_op.cuh b/mlx/backend/cuda/device/cast_op.cuh similarity index 100% rename from mlx/backend/cuda/kernels/cast_op.cuh rename to mlx/backend/cuda/device/cast_op.cuh diff --git a/mlx/backend/cuda/kernels/config.h b/mlx/backend/cuda/device/config.h similarity index 100% rename from mlx/backend/cuda/kernels/config.h rename to mlx/backend/cuda/device/config.h diff --git a/mlx/backend/cuda/kernels/cucomplex_math.cuh b/mlx/backend/cuda/device/cucomplex_math.cuh similarity index 100% rename from mlx/backend/cuda/kernels/cucomplex_math.cuh rename to mlx/backend/cuda/device/cucomplex_math.cuh diff --git a/mlx/backend/cuda/kernels/fp16_math.cuh b/mlx/backend/cuda/device/fp16_math.cuh similarity index 100% rename from mlx/backend/cuda/kernels/fp16_math.cuh rename to mlx/backend/cuda/device/fp16_math.cuh diff --git a/mlx/backend/cuda/kernels/unary_ops.cuh b/mlx/backend/cuda/device/unary_ops.cuh similarity index 98% rename from mlx/backend/cuda/kernels/unary_ops.cuh rename to mlx/backend/cuda/device/unary_ops.cuh index 6637a6eeb..af7c30e64 100644 --- a/mlx/backend/cuda/kernels/unary_ops.cuh +++ b/mlx/backend/cuda/device/unary_ops.cuh @@ -2,8 +2,8 @@ #pragma once -#include "mlx/backend/cuda/kernels/fp16_math.cuh" -#include "mlx/backend/cuda/kernels/utils.cuh" +#include "mlx/backend/cuda/device/fp16_math.cuh" +#include "mlx/backend/cuda/device/utils.cuh" namespace mlx::core::cu { diff --git a/mlx/backend/cuda/kernels/utils.cuh b/mlx/backend/cuda/device/utils.cuh similarity index 99% rename from mlx/backend/cuda/kernels/utils.cuh rename to mlx/backend/cuda/device/utils.cuh index e59095996..a1d387201 100644 --- a/mlx/backend/cuda/kernels/utils.cuh +++ b/mlx/backend/cuda/device/utils.cuh @@ -8,7 +8,7 @@ #pragma once -#include "mlx/backend/cuda/kernels/config.h" +#include "mlx/backend/cuda/device/config.h" #include #include diff --git a/mlx/backend/cuda/jit_module.h b/mlx/backend/cuda/jit_module.h index fcaa1fb3e..bbfaa45b0 100644 --- a/mlx/backend/cuda/jit_module.h +++ b/mlx/backend/cuda/jit_module.h @@ -4,7 +4,7 @@ #include "mlx/array.h" #include "mlx/backend/common/utils.h" -#include "mlx/backend/cuda/kernels/config.h" +#include "mlx/backend/cuda/device/config.h" #include #include diff --git a/mlx/backend/cuda/kernel_utils.cuh b/mlx/backend/cuda/kernel_utils.cuh index 656ddebea..7e957bbbd 100644 --- a/mlx/backend/cuda/kernel_utils.cuh +++ b/mlx/backend/cuda/kernel_utils.cuh @@ -1,13 +1,13 @@ // Copyright © 2025 Apple Inc. // This file includes host-only utilies for writing CUDA kernels, the difference -// from backend/cuda/kernels/utils.cuh is that the latter file only include +// from backend/cuda/device/utils.cuh is that the latter file only include // device-only code. #pragma once #include "mlx/array.h" -#include "mlx/backend/cuda/kernels/utils.cuh" +#include "mlx/backend/cuda/device/utils.cuh" #include #include diff --git a/mlx/backend/cuda/logsumexp.cu b/mlx/backend/cuda/logsumexp.cu index e539ac559..f57f82ea8 100644 --- a/mlx/backend/cuda/logsumexp.cu +++ b/mlx/backend/cuda/logsumexp.cu @@ -1,8 +1,8 @@ // Copyright © 2025 Apple Inc. #include "mlx/backend/cuda/device.h" +#include "mlx/backend/cuda/device/cast_op.cuh" #include "mlx/backend/cuda/kernel_utils.cuh" -#include "mlx/backend/cuda/kernels/cast_op.cuh" #include "mlx/backend/gpu/copy.h" #include "mlx/dtype_utils.h" #include "mlx/primitives.h" diff --git a/mlx/backend/cuda/primitives.cu b/mlx/backend/cuda/primitives.cu index ded0d80c7..48b189626 100644 --- a/mlx/backend/cuda/primitives.cu +++ b/mlx/backend/cuda/primitives.cu @@ -1,9 +1,9 @@ // Copyright © 2025 Apple Inc. #include "mlx/backend/cuda/device.h" +#include "mlx/backend/cuda/device/arange.cuh" +#include "mlx/backend/cuda/device/fp16_math.cuh" #include "mlx/backend/cuda/kernel_utils.cuh" -#include "mlx/backend/cuda/kernels/arange.cuh" -#include "mlx/backend/cuda/kernels/fp16_math.cuh" #include "mlx/distributed/primitives.h" #include "mlx/dtype_utils.h" #include "mlx/fast_primitives.h" diff --git a/mlx/backend/cuda/reduce/col_reduce.cu b/mlx/backend/cuda/reduce/col_reduce.cu index 1ca50d854..9911a6fe0 100644 --- a/mlx/backend/cuda/reduce/col_reduce.cu +++ b/mlx/backend/cuda/reduce/col_reduce.cu @@ -1,7 +1,7 @@ // Copyright © 2025 Apple Inc. #include "mlx/backend/cuda/device.h" -#include "mlx/backend/cuda/kernels/cast_op.cuh" +#include "mlx/backend/cuda/device/cast_op.cuh" #include "mlx/backend/cuda/reduce/reduce.cuh" #include diff --git a/mlx/backend/cuda/reduce/reduce.cuh b/mlx/backend/cuda/reduce/reduce.cuh index 0148022ab..a673e052e 100644 --- a/mlx/backend/cuda/reduce/reduce.cuh +++ b/mlx/backend/cuda/reduce/reduce.cuh @@ -1,8 +1,8 @@ // Copyright © 2025 Apple Inc. #include "mlx/backend/common/reduce.h" +#include "mlx/backend/cuda/device/cucomplex_math.cuh" #include "mlx/backend/cuda/kernel_utils.cuh" -#include "mlx/backend/cuda/kernels/cucomplex_math.cuh" #include "mlx/backend/cuda/reduce/reduce_ops.cuh" #include "mlx/dtype_utils.h" #include "mlx/primitives.h" diff --git a/mlx/backend/cuda/reduce/reduce_ops.cuh b/mlx/backend/cuda/reduce/reduce_ops.cuh index f06eb8541..832787222 100644 --- a/mlx/backend/cuda/reduce/reduce_ops.cuh +++ b/mlx/backend/cuda/reduce/reduce_ops.cuh @@ -2,7 +2,7 @@ #pragma once -#include "mlx/backend/cuda/kernels/utils.cuh" +#include "mlx/backend/cuda/device/utils.cuh" namespace mlx::core::cu { diff --git a/mlx/backend/cuda/reduce/row_reduce.cu b/mlx/backend/cuda/reduce/row_reduce.cu index 3a5c4a591..ae54a27d6 100644 --- a/mlx/backend/cuda/reduce/row_reduce.cu +++ b/mlx/backend/cuda/reduce/row_reduce.cu @@ -1,7 +1,7 @@ // Copyright © 2025 Apple Inc. #include "mlx/backend/cuda/device.h" -#include "mlx/backend/cuda/kernels/cast_op.cuh" +#include "mlx/backend/cuda/device/cast_op.cuh" #include "mlx/backend/cuda/reduce/reduce.cuh" #include diff --git a/mlx/backend/cuda/reduce/segmented_reduce.cu b/mlx/backend/cuda/reduce/segmented_reduce.cu index 563b056e4..114d71809 100644 --- a/mlx/backend/cuda/reduce/segmented_reduce.cu +++ b/mlx/backend/cuda/reduce/segmented_reduce.cu @@ -1,7 +1,7 @@ // Copyright © 2025 Apple Inc. #include "mlx/backend/cuda/device.h" -#include "mlx/backend/cuda/kernels/cast_op.cuh" +#include "mlx/backend/cuda/device/cast_op.cuh" #include "mlx/backend/cuda/reduce/reduce.cuh" #include diff --git a/mlx/backend/cuda/softmax.cu b/mlx/backend/cuda/softmax.cu index 605fc0df8..fc001ae75 100644 --- a/mlx/backend/cuda/softmax.cu +++ b/mlx/backend/cuda/softmax.cu @@ -1,9 +1,9 @@ // Copyright © 2025 Apple Inc. #include "mlx/backend/cuda/device.h" +#include "mlx/backend/cuda/device/cast_op.cuh" +#include "mlx/backend/cuda/device/fp16_math.cuh" #include "mlx/backend/cuda/kernel_utils.cuh" -#include "mlx/backend/cuda/kernels/cast_op.cuh" -#include "mlx/backend/cuda/kernels/fp16_math.cuh" #include "mlx/backend/gpu/copy.h" #include "mlx/dtype_utils.h" #include "mlx/primitives.h" diff --git a/mlx/backend/cuda/unary.cu b/mlx/backend/cuda/unary.cu index 0ee31ee28..f9d373455 100644 --- a/mlx/backend/cuda/unary.cu +++ b/mlx/backend/cuda/unary.cu @@ -2,10 +2,10 @@ #include "mlx/backend/common/unary.h" #include "mlx/backend/cuda/device.h" +#include "mlx/backend/cuda/device/cucomplex_math.cuh" +#include "mlx/backend/cuda/device/unary_ops.cuh" #include "mlx/backend/cuda/iterators/general_iterator.cuh" #include "mlx/backend/cuda/kernel_utils.cuh" -#include "mlx/backend/cuda/kernels/cucomplex_math.cuh" -#include "mlx/backend/cuda/kernels/unary_ops.cuh" #include "mlx/dtype_utils.h" #include "mlx/primitives.h"